0 Load Packages¶

In [ ]:
%load_ext autotime
time: 52.9 µs (started: 2024-04-26 03:53:46 -07:00)
In [ ]:
%load_ext watermark
%watermark
Last updated: 2024-04-26T03:53:55.133962-07:00

Python implementation: CPython
Python version       : 3.11.8
IPython version      : 8.20.0

Compiler    : GCC 12.3.0
OS          : Linux
Release     : 4.18.0-477.15.1.el8_8.x86_64
Machine     : x86_64
Processor   : x86_64
CPU cores   : 64
Architecture: 64bit

time: 343 ms (started: 2024-04-26 03:53:54 -07:00)
In [ ]:
import polars as pl
import warnings
warnings.filterwarnings('ignore')

import os
from rich import print
import datetime

from sklearnex import patch_sklearn
patch_sklearn()

from sklearn.metrics import silhouette_score
from sklearn.cluster import KMeans
from scipy.spatial.distance import cdist

from tqdm import tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import ipywidgets as widgets
from ipywidgets import interact, interact_manual

# import panel as pn
# pn.extension()
Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)
time: 50 s (started: 2024-04-26 03:54:06 -07:00)
In [ ]:
%watermark --iversions
polars    : 0.20.17
matplotlib: 3.8.2
pandas    : None
ipywidgets: 8.1.1
numpy     : 1.26.3

time: 99 ms (started: 2024-04-26 03:55:01 -07:00)

1 Data and Filter¶

In [ ]:
# READ IN DATA and DESCRIBE
RATING_FILE_PATH = '../data/ratings.csv'  # ml-32m latest curated dataset
df32m = pl.read_csv(RATING_FILE_PATH)
df32m = df32m.with_columns(
    pl.col('movieId').cast(pl.Utf8).cast(pl.Categorical),
    pl.col('userId').cast(pl.Utf8).cast(pl.Categorical),
    pl.from_epoch(pl.col('timestamp'), time_unit='s')
)
with pl.Config(tbl_rows=20):
    print(f"{df32m.describe(percentiles=[.01, .1, .2, .25, .3, .4, .5, .6, .7, .75, .8, .9, .99])=}")
df32m.describe(percentiles=[.01, .1, .2, .25, .3, .4, .5, .6, .7, .75, .8, .9, .99])=shape: (19, 5)
┌────────────┬──────────┬──────────┬─────────────┬────────────────────────────┐
│ statistic  ┆ userId   ┆ movieId  ┆ rating      ┆ timestamp                  │
│ ---        ┆ ---      ┆ ---      ┆ ---         ┆ ---                        │
│ str        ┆ str      ┆ str      ┆ f64         ┆ str                        │
╞════════════╪══════════╪══════════╪═════════════╪════════════════════════════╡
│ count      ┆ 32000204 ┆ 32000204 ┆ 3.2000204e7 ┆ 32000204                   │
│ null_count ┆ 0        ┆ 0        ┆ 0.0         ┆ 0                          │
│ mean       ┆ null     ┆ null     ┆ 3.540396    ┆ 2010-05-30 17:39:59.573263 │
│ std        ┆ null     ┆ null     ┆ 1.058986    ┆ null                       │
│ min        ┆ null     ┆ null     ┆ 0.5         ┆ 1995-01-09 11:46:44        │
│ 1%         ┆ null     ┆ null     ┆ 0.5         ┆ 1996-06-10 17:26:52        │
│ 10%        ┆ null     ┆ null     ┆ 2.0         ┆ 1999-11-17 10:23:31        │
│ 20%        ┆ null     ┆ null     ┆ 3.0         ┆ 2001-08-10 16:24:23        │
│ 25%        ┆ null     ┆ null     ┆ 3.0         ┆ 2003-04-22 11:53:50        │
│ 30%        ┆ null     ┆ null     ┆ 3.0         ┆ 2004-10-16 05:15:06        │
│ 40%        ┆ null     ┆ null     ┆ 3.5         ┆ 2007-01-26 19:22:44        │
│ 50%        ┆ null     ┆ null     ┆ 3.5         ┆ 2010-04-30 10:03:49        │
│ 60%        ┆ null     ┆ null     ┆ 4.0         ┆ 2015-02-01 22:28:39        │
│ 70%        ┆ null     ┆ null     ┆ 4.0         ┆ 2016-10-13 13:27:36        │
│ 75%        ┆ null     ┆ null     ┆ 4.0         ┆ 2017-08-19 15:59:05        │
│ 80%        ┆ null     ┆ null     ┆ 4.5         ┆ 2018-10-03 07:21:40        │
│ 90%        ┆ null     ┆ null     ┆ 5.0         ┆ 2020-11-05 19:45:35        │
│ 99%        ┆ null     ┆ null     ┆ 5.0         ┆ 2023-06-28 02:27:13        │
│ max        ┆ null     ┆ null     ┆ 5.0         ┆ 2023-10-13 02:29:07        │
└────────────┴──────────┴──────────┴─────────────┴────────────────────────────┘
time: 10.6 s (started: 2024-04-26 03:55:21 -07:00)
In [ ]:
@interact

########################### Filtering the Raw Dataset: remove obsolete ratings ###########################
#### Filter 1: Recent Ratings, timestamp > cutoff_data = 2014-01-01
#### Filter 2: Movies with more than cut_movie = 200 ratings
#### Filter 3: Users with more than cut_user = 40 ratings
#### We are filtering step by step to guarantee the result aligns with what we need. While filtering all at once
#### (all filters applied on df32m) will give us a much smaller dataset that is not useful.

def show_recent_ratings(cutoff_date = '2014-01-01',
                        cut_movie = widgets.IntSlider(min=1, max=3000, step=1, value=200),
                        cut_user = widgets.IntSlider(min=1, max=3000, step=1, value=40)):
#   cutoff_date        = '2014-01-01'
#   cut_movie          = 200
#   cut_user           = 40
    df32m              = pl.read_csv(RATING_FILE_PATH)
    cutoff_date        = int(datetime.datetime.strptime(cutoff_date, "%Y-%m-%d").replace(tzinfo=datetime.timezone.utc).timestamp())
    f1_df              = df32m.filter(pl.col('timestamp') >= cutoff_date)
    movie_counts       = f1_df.group_by('movieId').agg(pl.len().alias('count'))
    filtered_movie_ids = movie_counts.filter(pl.col('count') >= cut_movie).select('movieId')
    f2_df              = f1_df.join(filtered_movie_ids, on='movieId', how='inner')
    user_counts        = f2_df.group_by('userId').agg(pl.len().alias('count'))
    filtered_user_ids  = user_counts.filter(pl.col('count') >= cut_user).select('userId')
    df                 = f2_df.join(filtered_user_ids, on='userId', how='inner')
    print(f"Original: \t{df32m.shape[0]:,}")
    print(f"Cutting date: \t{f1_df.shape[0]:,}")
    print(f"Cutting movie: \t{f2_df.shape[0]:,}")
    print(f"Cutting user: \t{df.shape[0]:,}")
    print(f"# of Ratings: \t{df.shape[0]:,}")
    print(f"# of Users: \t{df['userId'].n_unique():,}")
    print(f"# of Movies: \t{df['movieId'].n_unique():,}")
    print(f"Sparsity: \t{1-df.shape[0]/df['userId'].n_unique()/df['movieId'].n_unique():.4n}")
    print(f"User Ratings Count: \t{df['userId'].value_counts().describe()}")
    print(f"Movie Ratings Count: \t{df['movieId'].value_counts().describe()}")
    print(df)
interactive(children=(Text(value='2014-01-01', description='cutoff_date'), IntSlider(value=200, description='c…
time: 1.64 s (started: 2024-04-26 03:55:49 -07:00)
In [ ]:
# use the picked cutoff values for df and pt
cutoff_date        = '2014-01-01'
cut_movie          = 200
cut_user           = 40
df32m              = pl.read_csv(RATING_FILE_PATH)
cutoff_date        = int(datetime.datetime.strptime(cutoff_date, "%Y-%m-%d").replace(tzinfo=datetime.timezone.utc).timestamp())
f1_df              = df32m.filter(pl.col('timestamp') >= cutoff_date)
df32m              = df32m.with_columns(pl.from_epoch(pl.col('timestamp'), time_unit='s'))
f1_df              = f1_df.with_columns(pl.from_epoch(pl.col('timestamp'), time_unit='s'))
movie_counts       = f1_df.group_by('movieId').agg(pl.len().alias('count')).sort('count', descending=True)
filtered_movie_ids = movie_counts.filter(pl.col('count') >= cut_movie).select('movieId')
f2_df              = f1_df.join(filtered_movie_ids, on='movieId', how='inner')
user_counts        = f2_df.group_by('userId').agg(pl.len().alias('count')).sort('count', descending=True)
filtered_user_ids  = user_counts.filter(pl.col('count') >= cut_user).select('userId')
df                 = f2_df.join(filtered_user_ids, on='userId', how='inner')
time: 972 ms (started: 2024-04-26 03:58:27 -07:00)
In [ ]:
movie_counts = movie_counts.join(pl.read_csv('../data/movie_links_87461_title_genre.csv'), on='movieId', how='inner').sort('count', descending=True)
time: 304 ms (started: 2024-04-26 03:58:47 -07:00)
In [ ]:
movie_counts
Out[ ]:
shape: (83_882, 6)
movieIdcountimdbIdtmdbIdtitlegenres
i64u32i64i64strstr
7913247695137566627205"Inception (201…"Action|Crime|D…
257147209133093603"Matrix, The (1…"Action|Sci-Fi|…
31844585111161278"Shawshank Rede…"Crime|Drama"
5855942725468569155"Dark Knight, T…"Action|Crime|D…
295941295137523550"Fight Club (19…"Action|Crime|D…
………………
2927311268125101032473"The Monroy Aff…"Drama"
292737114907358986674"Shelter in Sol…"Comedy|Drama"
292753112388280948139"Orca (2023)""Drama"
292755164027182776"The Angry Bree…"Drama"
2927571289955661174725"Race to the Su…"Action|Adventu…
time: 3.96 ms (started: 2024-04-26 03:59:14 -07:00)
In [ ]:
user_counts
Out[ ]:
shape: (76_311, 2)
userIdcount
i64u32
1753254985
227444473
170353984
158753898
437033837
……
1703931
1735371
464011
654051
1239951
time: 1.83 ms (started: 2024-04-26 03:59:41 -07:00)
In [ ]:
filtered_user_ids
Out[ ]:
shape: (56_318, 1)
userId
i64
175325
22744
17035
15875
43703
…
71434
57670
86246
164766
200812
time: 1.66 ms (started: 2024-04-26 04:00:10 -07:00)
In [ ]:
filtered_movie_ids
Out[ ]:
shape: (5_981, 1)
movieId
i64
79132
2571
318
58559
2959
…
115967
251922
7976
67620
185997
time: 1.59 ms (started: 2024-04-26 05:54:07 -07:00)
In [ ]:
df.shape
Out[ ]:
(11727759, 4)
time: 1.37 ms (started: 2024-04-26 04:00:47 -07:00)
In [ ]:
#df.write_csv('../data/ratings_12m_filtered.csv')
time: 1.17 s (started: 2024-04-25 22:50:02 -07:00)

1.1 visualize the filtering process¶

1.1.1 the original df32m data¶

In [ ]:
%matplotlib inline
import plotly.express as px
# create a interactive 3D plot, with the x-axis being the 'timestamp', y-axis being the 'movieId', and z-axis being the 'userId'
# let the size of the points be the 'rating'
# add a plane to cut the plot in half, at timestamp=2014-01-01
# sample 1000 points to make the plot more interactive
plt_df = df32m.sample(1000).sort('timestamp')
plt_df = plt_df.with_columns(
    pl.col('movieId').cast(pl.Int64),
    pl.col('userId').cast(pl.Int64),
)
fig = px.scatter_3d(plt_df, x='timestamp', y='movieId', z='userId', size='rating', color='rating')
fig.update_traces(marker=dict(line=dict(width=4, color='DarkSlateGrey')))
fig.update_layout(scene = dict(
                    xaxis_title='timestamp',
                    yaxis_title='movieId',
                    zaxis_title='userId'),
                  showlegend=False)
fig.show()
time: 20.4 s (started: 2024-04-26 04:01:08 -07:00)

1.1.2 cut to 2014-01-01 and later, f1_df¶

In [ ]:
%matplotlib inline
import plotly.express as px
# create a interactive 3D plot, with the x-axis being the 'timestamp', y-axis being the 'movieId', and z-axis being the 'userId'
# let the size of the points be the 'rating'
# add a plane to cut the plot in half, at timestamp=2014-01-01
# sample 1000 points to make the plot more interactive
plt_df = f1_df.sample(1000).sort('timestamp')
plt_df = plt_df.with_columns(
    pl.col('movieId').cast(pl.Int64),
    pl.col('userId').cast(pl.Int64),
)
fig = px.scatter_3d(plt_df, x='timestamp', y='movieId', z='userId', size='rating', color='rating')
fig.update_traces(marker=dict(line=dict(width=4, color='DarkSlateGrey')))
fig.update_layout(scene = dict(
                    xaxis_title='timestamp',
                    yaxis_title='movieId',
                    zaxis_title='userId'),
                  showlegend=False)
fig.show()
time: 96.8 ms (started: 2024-04-26 04:01:47 -07:00)

1.1.3 cut to movies with at least 200 votes, f2_df¶

In [ ]:
%matplotlib inline
import plotly.express as px
# create a interactive 3D plot, with the x-axis being the 'timestamp', y-axis being the 'movieId', and z-axis being the 'userId'
# let the size of the points be the 'rating'
# add a plane to cut the plot in half, at timestamp=2014-01-01
# sample 1000 points to make the plot more interactive
plt_df = f2_df.sample(1000).sort('timestamp')
plt_df = plt_df.with_columns(
    pl.col('movieId').cast(pl.Int64),
    pl.col('userId').cast(pl.Int64),
)
fig = px.scatter_3d(plt_df, x='timestamp', y='movieId', z='userId', size='rating', color='rating')
fig.update_traces(marker=dict(line=dict(width=4, color='DarkSlateGrey')))
fig.update_layout(scene = dict(
                    xaxis_title='timestamp',
                    yaxis_title='movieId',
                    zaxis_title='userId'),
                  showlegend=False)
fig.show()
time: 41.2 ms (started: 2024-04-26 04:02:23 -07:00)

1.1.4 cut to users with at least 40 votes, df¶

In [ ]:
%matplotlib inline
import plotly.express as px
# create a interactive 3D plot, with the x-axis being the 'timestamp', y-axis being the 'movieId', and z-axis being the 'userId'
# let the size of the points be the 'rating'
# add a plane to cut the plot in half, at timestamp=2014-01-01
# sample 1000 points to make the plot more interactive
plt_df = df.sample(1000).sort('timestamp')
plt_df = plt_df.with_columns(
    pl.col('movieId').cast(pl.Int64),
    pl.col('userId').cast(pl.Int64),
)
fig = px.scatter_3d(plt_df, x='timestamp', y='movieId', z='userId', size='rating', color='rating')
fig.update_traces(marker=dict(line=dict(width=4, color='DarkSlateGrey')))
fig.update_layout(scene = dict(
                    xaxis_title='timestamp',
                    yaxis_title='movieId',
                    zaxis_title='userId'),
                  showlegend=False)
fig.show()
time: 41.5 ms (started: 2024-04-26 04:02:49 -07:00)
In [ ]:
# 2d plot
In [ ]:
%matplotlib inline
import plotly.express as px
# create a interactive 2D plot, with the x-axis being the 'timestamp', y-axis being the 'movieId', and z-axis being the 'userId'
# let the size of the points be the 'rating'
# add a plane to cut the plot in half, at timestamp=2014-01-01
# sample 1000 points to make the plot more interactive
plt_df = df.sample(1000).sort('timestamp')
plt_df = plt_df.with_columns(
    pl.col('movieId').cast(pl.Int64),
    pl.col('userId').cast(pl.Int64),
)
fig = px.scatter(plt_df, x='movieId', y='userId', size='rating', color='rating')
fig.update_traces(marker=dict(line=dict(width=4, color='DarkSlateGrey')))
fig.update_layout(scene = dict(
                    xaxis_title='movieId',
                    yaxis_title='userId'),
                  showlegend=False)
fig.show()
time: 1.1 s (started: 2024-04-26 04:03:20 -07:00)

1.2 Pivot Table¶

In [ ]:
pt = df.pivot(index="userId", columns="movieId", values="rating")
# sort most voted users and movies on top
X = pt.select(pl.col('userId'),pl.col(movie_counts.filter(pl.col("movieId").cast(str).is_in(pt.columns))["movieId"].cast(str)))
X = X.with_columns(pl.col('userId').cast(pl.Utf8).cast(pl.Categorical))
filtered_user_ids = filtered_user_ids.with_columns(pl.col('userId').cast(pl.Utf8).cast(pl.Categorical))
X = filtered_user_ids.join(X, on='userId', how='left')
X
Out[ ]:
shape: (56_318, 5_980)
userId791322571318585592959715349933561094875952296260593991141196600696895485813413091529527119868157357872998106782112852593154226653968744306337946377744581704…677089286427866304989569910611666011876013810414164815895619210919395420585728190452210812946327925594685790938199486797194116668169252203619208807224983813952797667620115967127134185997251922
catf64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64…f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64
"175325"4.04.04.54.53.54.04.03.54.04.04.54.04.04.04.04.04.04.54.54.04.54.04.03.53.54.04.03.54.04.04.03.54.04.04.04.5…3.03.53.5null4.02.53.54.0null3.0null3.53.0nullnull4.03.53.55.03.04.02.52.5null3.54.03.5nullnull1.53.02.0null3.02.03.0null
"22744"4.05.03.05.05.05.05.04.04.05.05.05.05.03.05.04.04.05.04.05.00.55.03.03.03.04.04.04.04.04.05.05.05.05.03.03.0…nullnullnull2.5null4.0nullnull2.02.01.0null0.5nullnull0.51.02.0null0.5nullnull3.0null2.0nullnullnullnull2.0nullnullnull2.02.03.0null
"17035"1.51.55.04.54.54.03.03.52.03.55.00.55.04.00.51.04.05.04.00.54.54.54.54.51.54.00.54.04.0null4.54.00.54.04.04.0…2.01.0null0.5null3.5null0.5null0.5nullnullnull2.5null2.04.5nullnull0.52.53.03.5nullnullnullnull3.5null4.03.5null3.52.53.5nullnull
"15875"3.05.05.04.05.04.04.03.54.04.05.04.03.54.54.53.03.54.05.04.04.04.05.02.53.04.04.04.05.03.04.04.03.53.54.02.5…3.03.0nullnullnullnullnullnullnullnull2.54.02.0nullnull4.03.0null3.0nullnull3.01.5null3.5nullnullnullnull3.01.53.5null3.0nullnullnull
"43703"2.55.03.54.03.53.02.52.53.52.53.53.55.03.55.04.03.54.02.53.53.53.54.02.52.04.02.53.53.53.03.53.03.52.53.03.5…1.03.03.01.5nullnullnullnullnullnullnull1.5null1.5null2.0nullnull2.0null2.52.0nullnull2.5null2.0null2.0nullnullnullnull2.0nullnull1.5
………………………………………………………………………………………………………………………………………………………………………………………………………
"71434"nullnullnullnullnullnullnullnullnullnull5.04.55.0nullnullnullnull5.0nullnull5.0nullnullnullnullnullnullnull5.0nullnullnullnullnullnullnull…nullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnull
"57670"4.54.5nullnullnullnullnullnull4.5nullnull4.5nullnull5.05.04.5null4.5nullnull4.0nullnull4.0null4.54.5nullnullnullnullnullnullnullnull…nullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnull
"86246"nullnullnull5.0nullnull3.0nullnullnullnullnullnullnullnullnullnullnullnull4.0nullnullnullnull4.0nullnull4.0null3.5nullnull4.5nullnullnull…nullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnull
"164766"4.5nullnull4.54.0nullnullnull4.5nullnullnullnullnullnullnullnullnull4.5nullnullnullnullnullnull4.0nullnull4.5nullnullnull4.0null4.0null…nullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnull
"200812"4.0null5.0nullnull4.54.5nullnullnullnullnullnullnullnull2.0nullnullnullnullnullnull4.0nullnullnullnullnullnull3.0null2.5nullnullnullnull…nullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnull
time: 3.1 s (started: 2024-04-26 04:07:08 -07:00)
In [ ]:
def calculate_sparsity(df: pl.DataFrame) -> float:
    """
    Calculate the sparsity of a Polars DataFrame.
    Args:
    df (pl.DataFrame): The Polars DataFrame for which to calculate sparsity.
    Returns:
    float: The sparsity of the DataFrame.
    """
    # Total elements is the number of rows multiplied by the number of columns
    total_elements = df.height * df.width
    # Count non-missing, non-zero values
    missing = sum(df.null_count())
    # Sparsity is 1 minus the proportion of non-zero, non-missing entries
    sparsity = missing / total_elements
    return sparsity

# Example usage with a dummy DataFrame:
dummy_data = pl.DataFrame({
    'A': [0, 0, 1, None],
    'B': [1, 2, 0, 0],
    'C': [None, None, None, None]
})

calculate_sparsity(dummy_data)
Out[ ]:
shape: (1,)
A
f64
0.416667
time: 2.27 ms (started: 2024-04-26 11:45:58 -07:00)
In [ ]:
print(calculate_sparsity(X[:10000,:]))
print(calculate_sparsity(X[:10000,:1001]))
shape: (1,)
Series: 'userId' [f64]
[
        0.895131
]
shape: (1,)
Series: 'userId' [f64]
[
        0.646962
]
time: 27.8 ms (started: 2024-04-26 11:47:22 -07:00)

2 Clustering¶

2.0 Over-simplifying the market as a whole¶

In [ ]:
# One Centroid represents the whole market. Each Movie's mean rating is its loading for this whole cluster
loadings = X.mean()
loadings
Out[ ]:
shape: (1, 5_980)
userId791322571318585592959715349933561094875952296260593991141196600696895485813413091529527119868157357872998106782112852593154226653968744306337946377744581704…677089286427866304989569910611666011876013810414164815895619210919395420585728190452210812946327925594685790938199486797194116668169252203619208807224983813952797667620115967127134185997251922
catf64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64…f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64
null4.1328164.118844.3285114.1337164.2186584.033314.0281874.0762314.1127614.017894.2264283.9570794.0824264.0084244.0460463.9784853.9013114.2196314.0061423.9339314.081143.9343574.0287353.9563863.5121073.9187743.8977363.7491194.0835593.6880883.8028613.598613.8692683.7393083.9911134.135534…3.6040613.35753.852.5247523.6450782.783.5338543.6381913.40753.1947372.8535353.89252.7806123.3109453.2550513.3826533.5104173.4355673.8704662.7562193.3877553.282.952023.873.2064683.9923863.66753.41253.2537693.0353.4070353.1994823.4924243.4772733.252.7219392.645729
time: 56.8 ms (started: 2024-04-26 05:18:12 -07:00)
In [ ]:
# finetuning for the whole market:
# {"system":"use this movie plot to predict the average market reaction in a scale from 0.5 to 5: {{plot}} ", "answer": "{{loading}}"}

# example:
# {"question": "use this move plot "}

2.1 Over-complicating, we have personalization¶

However, in the real world, we have 99.8% missing values such that we cannot reach personalization without introducing biases.

In [ ]:
 

2.2 Somewhere in the middle,¶

In [ ]:
 # now we want to find a middle ground
 # a K-group users, with distinction and representation
time: 210 µs (started: 2024-04-26 05:49:23 -07:00)
In [ ]:
df_with_titles = df.join(movie_counts, on='movieId')
Out[ ]:
shape: (11_727_113, 9)
userIdmovieIdratingtimestampcountimdbIdtmdbIdtitlegenres
i64i64f64datetime[μs]u32i64i64strstr
1013203.02015-03-22 15:58:1933901036448077"Alien³ (a.k.a.…"Action|Horror|…
1020033.52017-03-18 20:26:28352787363927"Gremlins (1984…"Comedy|Horror"
1029853.02014-11-01 19:48:254936938705548"RoboCop (1987)…"Action|Crime|D…
10600402.52014-08-07 15:45:1655938000801724"Incredible Hul…"Action|Sci-Fi"
10687912.02016-06-11 14:32:443984438488534"Terminator Sal…"Action|Adventu…
………………………
2009471167973.52017-05-28 12:02:34244602084970205596"The Imitation …"Drama|Thriller…
2009471228823.52017-05-28 12:13:5221315139219076341"Mad Max: Fury …"Action|Adventu…
2009471348535.02017-05-28 12:11:46214832096673150540"Inside Out (20…"Adventure|Anim…
2009471520813.02017-05-28 12:04:17144312948356269149"Zootopia (2016…"Action|Adventu…
2009471649094.52017-05-28 12:16:1572313783958313369"La La Land (20…"Comedy|Drama|R…
time: 67.4 ms (started: 2024-04-26 05:55:27 -07:00)
In [ ]:
df_with_titles = df.join(movie_counts, on='movieId')
time: 64.2 ms (started: 2024-04-26 06:03:52 -07:00)
In [ ]:
# to pandas
df_with_titles_pd = df_with_titles.to_pandas()
df_with_titles_pd = df_with_titles_pd.groupby('userId').apply(lambda x: ';'.join(['{} {}'.format(a, b) for a, b in zip(x['title'], x['rating'])])).reset_index().rename(columns={0:'pattern'})
df_with_titles_pd
Out[ ]:
userId pattern
0 10 Alien³ (a.k.a. Alien 3) (1992) 3.0;Gremlins (1...
1 16 Braveheart (1995) 1.0;Bad Boys (1995) 1.5;Forr...
2 20 Toy Story (1995) 5.0;Twelve Monkeys (a.k.a. 12...
3 22 Star Wars: Episode IV - A New Hope (1977) 4.0;...
4 23 Toy Story (1995) 3.0;Star Wars: Episode IV - A...
... ... ...
56313 200933 Dead Man Walking (1995) 4.5;Chungking Express ...
56314 200943 Art of War, The (2000) 1.5;Finding Forrester (...
56315 200944 Toy Story (1995) 4.0;Seven (a.k.a. Se7en) (199...
56316 200945 Get Shorty (1995) 4.0;Twelve Monkeys (a.k.a. 1...
56317 200947 Toy Story (1995) 4.0;Braveheart (1995) 4.5;Tax...

56318 rows × 2 columns

time: 12.1 s (started: 2024-04-26 06:55:50 -07:00)
In [ ]:
df_with_titles_pd.pattern.apply(lambda x: len(x)).max()
Out[ ]:
148335
time: 18.2 ms (started: 2024-04-26 06:57:20 -07:00)
In [ ]:
df_56k_user_patterns = df_with_titles_pd.set_index('userId')
time: 32.8 ms (started: 2024-04-26 07:32:04 -07:00)
In [ ]:
# use openai embeddings to convert patterns into embeddings
In [ ]:
import latentscope as ls
# ls.init('../ls_embeddings',openai_key="sk-proj-GlQIJefzzR8hTixZXDO4T3BlbkFJFFDsaVyztKblDis7N4b3") # tier-1 limit
ls.init('../ls_embeddings',openai_key="sk-proj-ENEqiPGjjZ1JwkfBWibTT3BlbkFJ5jxzolr8Z8EFq1pptjRt") # tier-4 limit
ls.ingest('56k-users', df_56k_user_patterns, 'pattern')
Initialized env with data directory at ../ls_embeddings
Loading environment variables from: /dfs6/pub/mgu3/____ML/ml-32m/ML32M10K/code/.env
DATA DIR ../ls_embeddings
DIRECTORY ../ls_embeddings/56k-users
                                             pattern
0  Alien³ (a.k.a. Alien 3) (1992) 3.0;Gremlins (1...
1  Braveheart (1995) 1.0;Bad Boys (1995) 1.5;Forr...
2  Toy Story (1995) 5.0;Twelve Monkeys (a.k.a. 12...
3  Star Wars: Episode IV - A New Hope (1977) 4.0;...
4  Toy Story (1995) 3.0;Star Wars: Episode IV - A...
                                                 pattern
56313  Dead Man Walking (1995) 4.5;Chungking Express ...
56314  Art of War, The (2000) 1.5;Finding Forrester (...
56315  Toy Story (1995) 4.0;Seven (a.k.a. Se7en) (199...
56316  Get Shorty (1995) 4.0;Twelve Monkeys (a.k.a. 1...
56317  Toy Story (1995) 4.0;Braveheart (1995) 4.5;Tax...
Index(['pattern'], dtype='object')
wrote ../ls_embeddings/56k-users/input.parquet
time: 2.83 s (started: 2024-04-26 07:32:44 -07:00)
wrote ../ls_embeddings/56k-users/input.parquet
time: 2.83 s (started: 2024-04-26 07:32:44 -07:00)
In [ ]:
%%capture

ls.serve()
time: 3h 51min 57s (started: 2024-04-26 07:32:56 -07:00)
In [ ]:
# load openai embeddings 001.h5 and do UMAP and HDBSCAN to get clusters.
# eyeball whether natural clusters exist
In [ ]:
 
In [ ]:
 
In [ ]:
 

2.2.1 Not Filling in Missings¶

2.2.1.1 Non-Negative Matrix Factorization¶

Singular Value Decomposition (SVD)¶

$$ \hat{r}_{ui} = q_i^T p_u $$ where user and item factors are kept positive.

Surprise SVD Algo Explanation

alt text

Leskovec, Rajarman, and Ullman

In [ ]:
X_fill_mean = X.fill_null(strategy='mean')
X_fill_zero = X.fill_null(strategy='zero')
X_fill_min  = X.fill_null(strategy='min')
time: 241 ms (started: 2024-04-26 04:10:16 -07:00)
In [ ]:
from sklearn.decomposition import NMF

A = X_fill_zero[:,1:]

# Initialize NMF and fit it to the matrix A
nmf = NMF(n_components=2, init='random', random_state=0)
W = nmf.fit_transform(A)
H = nmf.components_

# Print the resulting matrices
print(f"{W[:2]=}")
print(f"{H[:2]=}")
W:
 [[0.         0.83260542]
 [0.         2.49781625]
 [0.         3.33042167]
 [0.         4.16302709]
 [1.63417451 0.35754022]
 [1.97409297 0.        ]
 [0.81708726 0.17877011]]
H:
 [[0.         0.44234624 0.         2.49355028 2.49355028]
 [1.19564514 1.21171687 1.19564514 0.         0.        ]]
time: 21.4 ms (started: 2024-04-25 22:34:39 -07:00)
In [ ]:
from sklearn.decomposition import NMF
from sklearn.model_selection import GridSearchCV
import numpy as np

# Sample data (for example purposes, in practice your data will be different)
X = X_fill_zero[:, 1:]

# Parameters to search over
param_grid = {
    'n_components': [10, 15, 20, 25, 30, 35, 40, 45, 50],  # number of components
    'init': ['random', 'nndsvd'],  # initialization method
    'l1_ratio': [0, 0.5, 1],  # regularization mixing parameter
}

# Note: NMF does not have a 'score' method usually required by GridSearchCV
# Here we create a custom scorer function. In this case, it could be the NMF's
# reconstruction error itself. For the purpose of GridSearchCV, we need to
# make it a function that accepts the estimator, X, and y (which is ignored).
def nmf_score(estimator, X, y=None):
    # Compute the reconstruction error, the lower, the better
    reconstruction_error = np.linalg.norm(X - estimator.transform(X).dot(estimator.components_))
    return -reconstruction_error  # GridSearchCV maximizes the score so return the negative error

# Create the NMF instance
nmf = NMF()

# Create the GridSearchCV instance
grid_search = GridSearchCV(estimator=nmf, param_grid=param_grid, scoring=nmf_score, cv=3)

# Perform grid search (this may take some time depending on your data size and parameter grid)
grid_search.fit(X)

# Best parameters and score
best_params = grid_search.best_params_
best_score = grid_search.best_score_

print("Best parameters found:", best_params)
print("Best score found:", best_score)
Best parameters found:
{'init': 'random', 'l1_ratio': 0, 'n_components': 10}
Best score found: nan
time: 1h 7min 56s (started: 2024-04-25 23:03:26 -07:00)
In [ ]:
from sklearn.decomposition import NMF
from sklearn.model_selection import GridSearchCV
import numpy as np

# Sample data (for example purposes, in practice your data will be different)
A = X_fill_zero[:, 1:]

# Parameters to search over
param_grid = {
    'n_components': [8, 9,10, 11, 12, 13, 14, 15],  # number of components
    'init': ['random', 'nndsvd'],  # initialization method
    'l1_ratio': [0, 0.5, 1],  # regularization mixing parameter
}

# Note: NMF does not have a 'score' method usually required by GridSearchCV
# Here we create a custom scorer function. In this case, it could be the NMF's
# reconstruction error itself. For the purpose of GridSearchCV, we need to
# make it a function that accepts the estimator, X, and y (which is ignored).
def nmf_score(estimator, X, y=None):
    # Compute the reconstruction error, the lower, the better
    reconstruction_error = np.linalg.norm(X - estimator.transform(X).dot(estimator.components_))
    return -reconstruction_error  # GridSearchCV maximizes the score so return the negative error

# Create the NMF instance
nmf = NMF()

# Create the GridSearchCV instance
grid_search = GridSearchCV(estimator=nmf, param_grid=param_grid, scoring=nmf_score, cv=3)

# Perform grid search (this may take some time depending on your data size and parameter grid)
grid_search.fit(A)

# Best parameters and score
best_params = grid_search.best_params_
best_score = grid_search.best_score_

print("Best parameters found:", best_params)
print("Best score found:", best_score)
Best parameters found:
{'init': 'random', 'l1_ratio': 0, 'n_components': 8}
Best score found: nan
time: 33min 45s (started: 2024-04-26 04:13:30 -07:00)
In [ ]:
# NMF n=10
In [ ]:
from sklearn.decomposition import NMF

A = X_fill_zero[:,1:]

# Initialize NMF and fit it to the matrix A
nmf = NMF(n_components=10, init='random', random_state=0)
U = nmf.fit_transform(A)
M = nmf.components_

# Print the resulting matrices
print(f"{U[:2]=}")
print(f"{M[:2]=}")
U[:2]=array([[2.9517392 , 7.56836747, 0.        , 0.60258417, 0.55196251,
        0.        , 0.02226424, 0.97733986, 0.        , 4.84292076],
       [4.40771523, 9.0685571 , 0.        , 0.53739161, 0.68046949,
        0.        , 0.        , 0.61939399, 0.1306364 , 1.24826888]])
M[:2]=array([[0.        , 0.09362535, 0.        , ..., 0.00819534, 0.        ,
        0.02706899],
       [0.13194742, 0.10950007, 0.15303912, ..., 0.01518143, 0.01983315,
        0.00635778]])
time: 19.2 s (started: 2024-04-26 11:51:40 -07:00)
In [ ]:
# use U to cluster users into 10 groups
# jump to 2.3 KMeans
time: 184 µs (started: 2024-04-26 11:54:02 -07:00)
In [ ]:
print(np.load('../data/U.npy')[:2])
print(np.load('../data/M.npy')[:2])
[[2.9465047  7.57024189 0.         0.60239043 0.55344382 0.
  0.02233019 0.9683429  0.         4.84344654]
 [4.39981346 9.07085948 0.         0.53721187 0.68230197 0.
  0.         0.61367887 0.13100777 1.24841661]]
[[0.         0.09379923 0.         ... 0.00821147 0.         0.02712022]
 [0.13191784 0.10947779 0.153014   ... 0.01517629 0.01982737 0.00635432]]
time: 51.5 ms (started: 2024-04-26 00:23:36 -07:00)
In [ ]:
# write U and M to disk,
np.save('../data/U.npy', U)
np.save('../data/M.npy', M)
time: 77.9 ms (started: 2024-04-26 00:21:33 -07:00)
In [ ]:
# grid search to find the best svd hyper-parameters
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
from sklearn.pipeline import make_pipeline, make_union
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn.metrics import f1_score, make_scorer
time: 437 µs (started: 2024-04-25 16:41:57 -07:00)
In [ ]:
# example SVD n_c=2:
data = X_fill_zero[:,1:]

# Initialize SVD with desired number of components
# For example, reduce dimensions to 2
svd = TruncatedSVD(n_components=2)

# Fit SVD on the data
svd.fit(data)

# Transform the data
reduced_data = svd.transform(data)

# Explained variance ratio of each component
explained_variance_ratio = svd.explained_variance_ratio_

# Explained variance
explained_variance = svd.explained_variance_

# Total variance explained by the two components
total_variance_explained = explained_variance_ratio.sum()

print("Explained Variance Ratio per Component:", explained_variance_ratio)
print("Explained Variance per Component:", explained_variance)
print("Total Variance Explained by all components:", total_variance_explained)

# Optionally, print reduced data shape and data
print("Shape of the reduced data:", reduced_data.shape)
print("Reduced data sample:", reduced_data[:5])
Explained Variance Ratio per Component: [0.09722155 0.03288487]
Explained Variance per Component: [237.2527247   80.24995381]
Total Variance Explained by all components: 0.13010641926714567
Shape of the reduced data:
(56318, 2)
Reduced data sample: [[133.00376388  31.16510487]
 [121.15507989  56.95709056]
 [ 96.17317574   7.36524487]
 [121.23997846  23.14126167]
 [ 94.88499129   6.43712727]]
time: 2.24 s (started: 2024-04-25 20:45:46 -07:00)
In [ ]:
U0, S0, Vt0 = np.linalg.svd(X_fill_zero[:,1:], full_matrices=True)
print(f"{U0.shape=}")
print(f"{Vt0.shape=}")
print(f"{S0.shape=}")
print(f"{S0[:10]=}")
U0.shape=(56318, 56318)
Vt0.shape=(5979, 5979)
S0.shape=(5979,)
S0[:10]=array([6237.31217074, 2152.07968921, 2049.07343725, 1619.19600204,
       1521.81169818, 1424.41457779, 1179.46393407, 1129.03339058,
       1041.86336774,  977.87115844])
time: 5min 4s (started: 2024-04-25 21:43:00 -07:00)
In [ ]:
U, S, Vt = np.linalg.svd(X_fill_zero[:,1:], full_matrices=False)
print(f"{U.shape=}")
print(f"{Vt.shape=}")
print(f"{S.shape=}")
U.shape=(56318, 5979)
Vt.shape=(5979, 5979)
S.shape=(5979,)
time: 52.1 s (started: 2024-04-25 21:34:59 -07:00)
In [ ]:
print(S[:50]) # 50 most significant components
[6237.31217074 2152.07968921 2049.07343725 1619.19600204 1521.81169818
 1424.41457779 1179.46393407 1129.03339058 1041.86336774  977.87115844
  938.79926732  893.71094347  869.90878612  846.77842883  810.2193332
  781.02267472  764.4849044   746.67560439  738.28771328  712.34653437
  689.70132419  672.08246405  654.06569961  649.68152676  632.05935215
  611.58689271  603.23255205  597.63074211  587.64633119  577.87482512
  566.97381058  559.34838485  544.52478823  543.1516472   535.8183286
  526.95349245  521.08248945  512.63979617  507.96459684  503.53502669
  502.19393197  495.46971891  489.1205354   482.4307243   480.28695211
  474.52375232  469.32982393  464.65911358  463.28967386  459.40606028]
time: 3.26 ms (started: 2024-04-25 21:48:11 -07:00)

Variations such as Truncated SVD, Random SVD, etc.¶

In [ ]:
# Truncated SVD 8,18,28,...,208:
data = X_fill_zero[:,1:]

# Initialize SVD with desired number of components
# For example, reduce dimensions to 2
for n_components in range(8, 208, 10):
    svd = TruncatedSVD(n_components=n_components)
    # Fit SVD on the data
    svd.fit(data)
    # Transform the data
    reduced_data = svd.transform(data)
    # Explained variance ratio of each component
    explained_variance_ratio = svd.explained_variance_ratio_
    # Explained variance
    explained_variance = svd.explained_variance_
    # Total variance explained by the two components
    total_variance_explained = explained_variance_ratio.sum()
    print('*'*8, f'{n_components=}','*'*8)
    print("Explained Variance Ratio per Component:", explained_variance_ratio)
    print("Explained Variance per Component:", explained_variance)
    print("Total Variance Explained by all components:", total_variance_explained)
    # Optionally, print reduced data shape and data
    print("Shape of the reduced data:", reduced_data.shape)
    print("Reduced data sample:", reduced_data[:2])
******** n_components=8 ********
Explained Variance Ratio per Component: [0.09722155 0.03288487 0.02784989 0.01907246 0.01673718 0.01475283
 0.01010284 0.0092745 ]
Explained Variance per Component: [237.2527247   80.24995498  67.96294291  46.54311131  40.84425068
  36.00178855  24.65426763  22.63284597]
Total Variance Explained by all components: 0.22789612304174614
Shape of the reduced data:
(56318, 8)
Reduced data sample: [[133.00376387  31.16636601 123.42716109  15.45883823 -23.44536389
   -6.45489709  12.82095864  48.62831849]
 [121.15507988  56.95791992  85.95726368  -8.90305672   5.83964536
   -1.95302928  -8.05168437   6.24374895]]
******** n_components=18 ********
Explained Variance Ratio per Component: [0.09722155 0.03288487 0.02784989 0.01907246 0.01673716 0.01475282
 0.01010298 0.00927505 0.0078448  0.00695542 0.00637767 0.00568503
 0.00550605 0.00521344 0.00476878 0.00443415 0.00425041 0.00403835]
Explained Variance per Component: [237.2527247   80.24995645  67.96295032  46.54311305  40.84420683
  36.00175104  24.65460742  22.63419888  19.14390046  16.97351371
  15.5636117   13.8733453   13.43657565  12.72250805  11.63739085
  10.82080127  10.37240324   9.85490876]
Total Variance Explained by all components: 0.28297086576977293
Shape of the reduced data:
(56318, 18)
Reduced data sample: [[133.00376387  31.16636085 123.42720867  15.45900093 -23.44687303
   -6.4565844   12.81649118  48.48953461   2.20274735   2.26620521
   32.7787854   42.66543285  27.99113157  32.43558668  15.65286104
   25.74843185   9.59038797  -7.43816194]
 [121.15507988  56.95798045  85.95741523  -8.90393668   5.8355789
   -1.96477038  -8.10452038   6.01388268  31.98164497  13.15604454
   15.88449945  31.07839057  18.77301111  26.18327316   2.27267186
    7.98095153  14.79152468  12.94739324]]
******** n_components=28 ********
Explained Variance Ratio per Component: [0.09722155 0.03288487 0.02784989 0.01907246 0.01673716 0.01475282
 0.01010298 0.00927506 0.0078448  0.00695542 0.00637769 0.00568513
 0.00550618 0.00521393 0.00476938 0.0044347  0.00425096 0.00404262
 0.00396348 0.00368588 0.00345731 0.00327683 0.00309716 0.00305912
 0.00289585 0.00268953 0.00263987 0.00258719]
Explained Variance per Component: [237.2527247   80.24995645  67.96295032  46.54311307  40.84420644
  36.0017507   24.65460609  22.63420225  19.1438969   16.97351415
  15.56368026  13.87359867  13.43689672  12.72371436  11.63886411
  10.82212789  10.3737554    9.86533288   9.6721969    8.99477401
   8.43699095   7.99654311   7.55809708   7.46526093   7.06682571
   6.56334561   6.44214799   6.31360583]
Total Variance Explained by all components: 0.3143298182170661
Shape of the reduced data:
(56318, 28)
Reduced data sample: [[133.00376387  31.16636484 123.4272117   15.45903375 -23.4470334
   -6.45676562  12.81544843  48.49115438   2.18343315   2.26291561
   32.78573574  42.52659631  28.14083058  32.19747057  15.92075381
   26.05363435   9.74021692  -6.89725431   6.57106597   1.08353349
    0.63529727  -7.69918902  13.31487261  -4.99679251  18.24149368
   -2.35083628  18.51751569   7.79806557]
 [121.15507988  56.95798555  85.95742172  -8.90386699   5.83569167
   -1.96500169  -8.10534737   6.02032055  31.95596152  13.17854582
   15.92157046  31.02292516  18.80600053  25.84258338   2.6278006
    8.15649713  14.76282105  14.01963808   2.48819273  -4.05606637
   13.95550766   0.52193685  -6.85257852   6.61923656  -8.78692127
   -8.95955255   0.17326256  16.85387057]]
******** n_components=38 ********
Explained Variance Ratio per Component: [0.09722155 0.03288487 0.02784989 0.01907246 0.01673716 0.01475282
 0.01010298 0.00927506 0.0078448  0.00695542 0.0063777  0.00568514
 0.00550618 0.00521396 0.00476944 0.00443477 0.00425112 0.00404301
 0.00396363 0.0036867  0.00345741 0.0032783  0.00310991 0.0030595
 0.00290254 0.00271493 0.00264468 0.00258926 0.00250471 0.00242729
 0.00232917 0.00226779 0.00215216 0.00213451 0.00206695 0.00199507
 0.00192281 0.00186449]
Explained Variance per Component: [237.2527247   80.24995646  67.9629503   46.54311306  40.84420641
  36.00175066  24.65460649  22.63420232  19.14389646  16.97351655
  15.56368862  13.87361423  13.43689737  12.72377958  11.63900515
  10.82229427  10.37414842   9.86628168   9.67256569   8.99676972
   8.43722988   8.00014372   7.58921106   7.46618208   7.08316508
   6.62532471   6.45390292   6.31864634   6.11230916   5.92340169
   5.68395432   5.53414738   5.25197418   5.20890295   5.04403506
   4.86862988   4.69230413   4.54998373]
Total Variance Explained by all components: 0.33605012797396866
Shape of the reduced data:
(56318, 38)
Reduced data sample: [[133.00376387  31.16636467 123.42721112  15.45902867 -23.44702243
   -6.45675433  12.81576212  48.49110527   2.1821811    2.26902253
   32.78954314  42.51468098  28.13701326  32.20328497  15.95038119
   26.07991622   9.75461692  -6.70120187   6.65322363   1.30376486
    0.48761689  -7.7240546   12.17725229  -7.20091229  18.06477794
   -4.29203813  19.76381899   6.64369691  12.4471184    7.1778797
   15.53576508  16.65147115  11.56728922  -8.60793799  -4.66156454
   14.70019297  13.07390359  -4.9849275 ]
 [121.15507988  56.95798553  85.9574215   -8.90387458   5.83568602
   -1.96502385  -8.10539169   6.0205663   31.954539    13.17987514
   15.92834629  31.0182058   18.80915723  25.84513447   2.62678745
    8.19036588  14.74813615  14.05672546   2.43345142  -4.07784751
   13.69944389   0.80554797  -3.85595569   8.55227622  -9.30079318
  -10.8233526    1.32787331  16.25863232  -2.05962705   1.5755995
   18.44027719   4.57085988   0.48190616   0.15928762   5.99198621
    4.70183059  -3.83517568  -5.7695013 ]]
******** n_components=48 ********
Explained Variance Ratio per Component: [0.09722155 0.03288487 0.02784989 0.01907246 0.01673716 0.01475282
 0.01010298 0.00927506 0.0078448  0.00695542 0.00637769 0.00568514
 0.00550618 0.00521396 0.00476945 0.00443479 0.00425115 0.00404315
 0.00396365 0.00368676 0.00345757 0.0032786  0.0031099  0.00305956
 0.00290309 0.00271648 0.00264489 0.00258996 0.00250983 0.00242769
 0.00233054 0.00227212 0.00215142 0.00214006 0.00208009 0.00201451
 0.00195913 0.00190259 0.00186724 0.00182943 0.00182017 0.00175966
 0.00170704 0.00168124 0.00163504 0.00156767 0.00154982 0.00152112]
Explained Variance per Component: [237.2527247   80.24995646  67.96295031  46.54311306  40.84420645
  36.0017507   24.65460664  22.63420233  19.14389594  16.97351645
  15.5636779   13.87362504  13.43690347  12.72377619  11.63903287
  10.82234197  10.37420361   9.86662574   9.67261657   8.99691857
   8.43761214   8.00086685   7.58918754   7.4663288    7.08448914
   6.62911519   6.45439615   6.32036451   6.12481318   5.92436699
   5.68728794   5.54471241   5.25017176   5.22244356   5.07609946
   4.91608141   4.78091979   4.64295681   4.55667102   4.46441539
   4.44181817   4.29416307   4.1657305    4.10279346   3.99002617
   3.82563332   3.78207428   3.71204444]
Total Variance Explained by all components: 0.35311536156774
Shape of the reduced data:
(56318, 48)
Reduced data sample: [[ 1.33003764e+02  3.11663647e+01  1.23427211e+02  1.54590262e+01
  -2.34470212e+01 -6.45673358e+00  1.28157200e+01  4.84913044e+01
   2.18286255e+00  2.26913226e+00  3.27904160e+01  4.25131018e+01
   2.81339698e+01  3.21966896e+01  1.59421371e+01  2.61066873e+01
   9.76826113e+00 -6.71389287e+00  6.65940693e+00  1.28758923e+00
   6.05928991e-01 -7.81453799e+00  1.20437826e+01 -7.49976032e+00
   1.76667883e+01 -4.35916922e+00  2.05399928e+01  7.29665522e+00
   1.29165914e+01  7.09350112e+00  1.45003823e+01  1.52027317e+01
   1.45472372e+01 -4.38977929e+00 -7.88311430e+00  1.52424586e+01
  -1.13786291e+01 -7.27151954e+00 -9.44917435e+00 -5.06167208e+00
  -2.29842672e+00 -1.16707910e+00 -5.51253350e+00 -4.73513903e+00
   5.65140907e-01 -3.68477244e-01 -3.70733896e+00  8.95486666e+00]
 [ 1.21155080e+02  5.69579855e+01  8.59574217e+01 -8.90387650e+00
   5.83569174e+00 -1.96499855e+00 -8.10548047e+00  6.02060856e+00
   3.19554749e+01  1.31814635e+01  1.59294112e+01  3.10172516e+01
   1.88036760e+01  2.58376024e+01  2.62828869e+00  8.22521775e+00
   1.47655147e+01  1.40443612e+01  2.43725587e+00 -4.04568444e+00
   1.38702621e+01  7.57576088e-01 -4.00046482e+00  8.30845980e+00
  -9.74516516e+00 -1.10679932e+01  1.69858771e+00  1.59986096e+01
  -2.00753011e+00  2.30116228e+00  1.90327524e+01  3.84830324e+00
   2.27136182e+00  1.01725908e+00  3.95255755e+00  6.70212616e+00
  -3.02273020e+00  8.28049835e+00 -6.85818163e+00 -1.43645782e+00
  -7.79837632e+00  2.40284687e+00  7.50871245e-02  2.92361181e+00
   2.64602556e+00 -4.46671664e+00  3.00380013e+00  1.56913484e+01]]
******** n_components=58 ********
Explained Variance Ratio per Component: [0.09722155 0.03288487 0.02784989 0.01907246 0.01673716 0.01475282
 0.01010298 0.00927506 0.0078448  0.00695542 0.00637769 0.00568514
 0.00550618 0.00521396 0.00476945 0.00443479 0.00425115 0.00404316
 0.00396366 0.00368676 0.00345757 0.00327867 0.00310998 0.00305991
 0.00290313 0.00271677 0.00264566 0.00259055 0.002512   0.00242943
 0.00233391 0.00227492 0.00215505 0.00214467 0.00208487 0.00201668
 0.00197051 0.00190182 0.00187564 0.00182998 0.00182778 0.00178112
 0.0017252  0.00168409 0.00166803 0.00160952 0.00158726 0.00154985
 0.00153487 0.00150022 0.00147067 0.0014341  0.00141603 0.00137657
 0.0013574  0.00134466 0.0013307  0.0013161 ]
Explained Variance per Component: [237.2527247   80.24995646  67.96295031  46.54311306  40.84420642
  36.00175069  24.65460662  22.63420233  19.14389657  16.97351591
  15.56368115  13.87362439  13.43690321  12.72377723  11.6390236
  10.822345    10.37420068   9.86664534   9.67263954   8.99691745
   8.43762535   8.00104006   7.58938177   7.46718745   7.08458599
   6.62982734   6.45627683   6.32180777   6.1301123    5.9286076
   5.69550692   5.55156646   5.2590445    5.23370043   5.08776834
   4.92137779   4.80870539   4.64106363   4.57718608   4.46574767
   4.46037911   4.34653058   4.21005624   4.10974775   4.07054314
   3.92774879   3.87344552   3.78214142   3.74558462   3.66103594
   3.58892197   3.49968404   3.45558976   3.35929546   3.31250342
   3.28140989   3.2473566    3.21171684]
Total Variance Explained by all components: 0.3674348405398642
Shape of the reduced data:
(56318, 58)
Reduced data sample: [[ 1.33003764e+02  3.11663647e+01  1.23427211e+02  1.54590260e+01
  -2.34470198e+01 -6.45674038e+00  1.28157209e+01  4.84912928e+01
   2.18312604e+00  2.26879765e+00  3.27896494e+01  4.25139377e+01
   2.81331331e+01  3.21968864e+01  1.59421061e+01  2.61147484e+01
   9.77124399e+00 -6.71413290e+00  6.66646896e+00  1.28381988e+00
   6.01649232e-01 -7.83571333e+00  1.20787403e+01 -7.41247954e+00
   1.76241427e+01 -4.33811073e+00  2.03958127e+01  6.79434057e+00
   1.29204228e+01  6.97056813e+00  1.53455369e+01  1.44247550e+01
   1.23993415e+01 -8.00832783e+00 -6.21109410e+00  1.86479458e+01
  -9.98947064e+00 -7.51494610e+00 -6.47810050e+00 -7.83906973e+00
  -5.06798353e+00  2.70020664e+00 -8.27996753e+00  2.50870857e+00
   3.88754578e+00  3.42435010e-01  1.64787133e+00  1.70752532e+01
   1.87568897e+00  8.55961038e+00 -5.34009234e+00  6.19212344e-01
  -3.50813741e+00  4.42900333e+00  1.15615517e+01  1.11572399e+00
   1.74891150e+00  5.59351020e+00]
 [ 1.21155080e+02  5.69579855e+01  8.59574217e+01 -8.90387697e+00
   5.83569036e+00 -1.96499799e+00 -8.10544382e+00  6.02059577e+00
   3.19552345e+01  1.31809707e+01  1.59281478e+01  3.10156446e+01
   1.88038075e+01  2.58382618e+01  2.62762086e+00  8.23966423e+00
   1.47725637e+01  1.40510995e+01  2.42428838e+00 -4.04582362e+00
   1.38458523e+01  7.30741421e-01 -3.96098188e+00  8.39054772e+00
  -9.69573366e+00 -1.09963383e+01  1.70450178e+00  1.58714193e+01
  -1.56484538e+00  1.77416292e+00  1.95693332e+01  2.69812012e+00
   1.08052293e+00 -9.70316709e-02  4.13363914e+00  7.69732730e+00
  -2.37516725e+00  9.95628089e+00 -4.73121944e+00 -8.49826885e+00
  -1.13706006e+00  2.63902724e+00  5.12793656e+00 -4.36502034e+00
   7.72387972e-01  2.41564086e+00  1.59137630e+00  8.46749223e+00
   1.43045984e+01  8.52828134e+00 -1.33647159e+00  9.59675838e+00
  -3.37656241e+00  3.50470822e+00  4.35619130e+00 -7.57429208e+00
  -5.11842048e+00  2.64902911e+00]]
******** n_components=68 ********
Explained Variance Ratio per Component: [0.09722155 0.03288487 0.02784989 0.01907246 0.01673716 0.01475282
 0.01010298 0.00927506 0.0078448  0.00695542 0.00637769 0.00568514
 0.00550618 0.00521396 0.00476945 0.00443479 0.00425115 0.00404316
 0.00396367 0.00368677 0.00345758 0.00327867 0.00310999 0.0030599
 0.00290326 0.00271681 0.00264578 0.0025907  0.00251216 0.00242936
 0.00233416 0.00227524 0.00215586 0.00214586 0.00208843 0.0020193
 0.00197262 0.00190948 0.00187429 0.00183975 0.00182759 0.00178054
 0.0017359  0.00168681 0.00166943 0.00162672 0.00158484 0.00155982
 0.00154872 0.00152363 0.00149099 0.00145217 0.00144487 0.00140349
 0.00137808 0.00135623 0.00135659 0.0013478  0.00134114 0.0013065
 0.00128595 0.00127793 0.00124825 0.00122851 0.00121746 0.00119264
 0.00117801 0.00116351]
Explained Variance per Component: [237.2527247   80.24995646  67.96295031  46.54311306  40.84420643
  36.0017507   24.65460656  22.63420233  19.14389638  16.97351607
  15.56368011  13.87362483  13.43690357  12.72377865  11.6390243
  10.82235166  10.37420503   9.86664625   9.67265748   8.99692953
   8.43764836   8.00104161   7.5894055    7.46715863   7.08490357
   6.62992668   6.45657964   6.32215503   6.13050715   5.9284367
   5.69613091   5.55232712   5.26100757   5.23661793   5.09646038
   4.92775568   4.8138554    4.65976966   4.57388639   4.48960234
   4.45992757   4.34511145   4.2361667    4.11637047   4.07395028
   3.96974633   3.86753204   3.80647986   3.7793911    3.71815257
   3.63850568   3.54376596   3.5259591    3.4249775    3.36297143
   3.30965533   3.31052459   3.28907168   3.27281545   3.18828768
   3.13814953   3.11856952   3.04613125   2.99797192   2.97099631
   2.91042756   2.87473039   2.83935424]
Total Variance Explained by all components: 0.3801622564505494
Shape of the reduced data:
(56318, 68)
Reduced data sample: [[133.00376387  31.16636469 123.42721126  15.4590261  -23.4470217
   -6.45674035  12.81569949  48.49125458   2.18302168   2.26869038
   32.78992237  42.51339924  28.1328793   32.19793767  15.94284915
   26.11436957   9.77150086  -6.71940298   6.67085653   1.29990316
    0.57888641  -7.82722865  12.11129346  -7.41192883  17.72385636
   -4.32222464  20.37302141   7.04264904  13.00849527   6.93157302
   15.81073169  14.54327869  10.81626036  -9.65554405  -7.23100314
   17.5416964  -10.64694398  -8.45343359  -6.72909171  -8.19036715
    1.0650004   -3.55529271  -6.62028684   2.2025157    2.48035287
    5.55421003   4.8551586   14.08410094  -1.56948062  12.54953333
    1.12863348   2.47813715  15.96875678   4.34725055   1.23446664
    4.57268219  -1.96189848   1.67435323  -5.8139415    5.52021519
    8.2714722   -5.65208598   1.6808753    5.10892235   2.15773093
    0.6256852   -0.9338832  -10.45773759]
 [121.15507988  56.95798552  85.95742171  -8.90387669   5.83568906
   -1.96500003  -8.10542793   6.02059964  31.95516845  13.18077455
   15.92818963  31.01539018  18.80296448  25.83974193   2.62754137
    8.23660068  14.7763301   14.04739504   2.4302134   -4.03626949
   13.84168941   0.71763898  -3.95850514   8.38433251  -9.64280477
  -11.00988161   1.7130614   16.04811962  -1.42645833   1.68209093
   19.63088693   2.75530826   1.31373846  -0.15570905   3.94148655
    7.33139875  -2.96553256   9.43097485  -4.49937295  -7.57500674
    4.58316878  -1.07067129   5.27063107  -4.15856023   2.3661998
    2.43382701  -0.15854762   9.00296063  12.92485048   9.90039807
    5.90829368   5.65788644  12.22639727   8.07935105  -8.23821709
    4.95213288   5.99040735  -0.72366237   0.75591105   3.30647793
    2.34162538  -2.21933856  -1.08989839   1.54447159  -0.15227023
  -10.67022032   0.6520758   -6.56765295]]
******** n_components=78 ********
Explained Variance Ratio per Component: [0.09722155 0.03288487 0.02784989 0.01907246 0.01673716 0.01475282
 0.01010298 0.00927506 0.0078448  0.00695542 0.00637769 0.00568514
 0.00550618 0.00521396 0.00476945 0.00443479 0.00425115 0.00404316
 0.00396367 0.00368677 0.00345759 0.00327868 0.00311    0.0030599
 0.00290326 0.00271682 0.00264584 0.00259078 0.00251234 0.00242955
 0.00233449 0.00227561 0.00215692 0.00214623 0.00208872 0.00201949
 0.00197273 0.001911   0.00187613 0.00183961 0.0018286  0.00178414
 0.00173792 0.00168848 0.00167524 0.0016358  0.00159723 0.00156523
 0.00154903 0.00152515 0.00149415 0.00145813 0.00144964 0.00141686
 0.00140171 0.00139345 0.0013739  0.00136276 0.00134291 0.00134226
 0.00132092 0.00129061 0.00128534 0.0012712  0.00123129 0.00122567
 0.00121539 0.0012026  0.00119293 0.0011707  0.00116725 0.00114455
 0.00112776 0.0011268  0.00111426 0.00109743 0.00107814 0.00107274]
Explained Variance per Component: [237.2527247   80.24995646  67.96295031  46.54311306  40.84420643
  36.0017507   24.65460651  22.63420233  19.14389634  16.97351609
  15.56368031  13.87362469  13.4369036   12.72377818  11.63902522
  10.82235049  10.37420419   9.86664602   9.67265663   8.99693718
   8.43765351   8.00105169   7.58941628   7.46716185   7.08491767
   6.62992928   6.45672478   6.32236301   6.1309388    5.92890673
   5.69692252   5.55323772   5.26358598   5.23750588   5.09717244
   4.92822293   4.81411613   4.66348114   4.57837539   4.48926492
   4.462385     4.35390308   4.24109547   4.12045239   4.08814872
   3.99189531   3.89778023   3.81968939   3.78015468   3.72185899
   3.6462218    3.55831469   3.53759506   3.45760364   3.42064788
   3.40048641   3.3527617    3.32559597   3.27715415   3.27555936
   3.22348463   3.14950798   3.13664474   3.10214704   3.00475287
   2.99103523   2.96594708   2.93473376   2.91115504   2.85690304
   2.84848638   2.79308675   2.75210285   2.74977199   2.71916706
   2.67809932   2.63102798   2.61783076]
Total Variance Explained by all components: 0.3919128250530247
Shape of the reduced data:
(56318, 78)
Reduced data sample: [[ 1.33003764e+02  3.11663647e+01  1.23427211e+02  1.54590267e+01
  -2.34470219e+01 -6.45674029e+00  1.28157044e+01  4.84913038e+01
   2.18302535e+00  2.26871303e+00  3.27900127e+01  4.25131529e+01
   2.81330148e+01  3.21984629e+01  1.59442416e+01  2.61125646e+01
   9.76997572e+00 -6.71292968e+00  6.66705953e+00  1.29743422e+00
   5.87908527e-01 -7.83739564e+00  1.21125078e+01 -7.41886764e+00
   1.77263435e+01 -4.38791784e+00  2.03556492e+01  6.99356935e+00
   1.29670524e+01  6.91418527e+00  1.57212461e+01  1.43397071e+01
   1.16746826e+01 -8.96467265e+00 -7.38509047e+00  1.70358725e+01
  -1.03836753e+01  8.52071586e+00 -6.52661721e+00 -8.26015183e+00
  -5.03111038e-01 -2.69745234e+00 -6.44515830e+00  1.94775552e+00
   2.96785139e+00  5.63188059e+00 -3.64588825e+00  1.17986682e+01
  -9.88994707e+00  9.95633024e+00  1.38937533e+00  1.37746187e+01
   1.03420521e+01  5.45035442e+00  3.26087360e+00 -2.54052223e+00
   8.28502966e-01 -1.64934265e+00  9.18147987e+00  2.53533857e+00
   1.83436851e+00 -1.45259208e+00 -5.80510995e-01 -7.43757163e-01
   8.37880798e+00 -1.14622026e+00  3.12044176e+00  3.74032926e-01
  -5.76775146e+00 -7.56045473e+00  9.78447293e+00  4.90530604e+00
  -3.50472914e-01 -8.81662130e+00 -8.10624973e+00  2.43327445e+00
   3.96817570e+00  4.87285778e+00]
 [ 1.21155080e+02  5.69579855e+01  8.59574217e+01 -8.90387661e+00
   5.83568951e+00 -1.96499983e+00 -8.10544285e+00  6.02062922e+00
   3.19552539e+01  1.31808064e+01  1.59284810e+01  3.10155061e+01
   1.88029928e+01  2.58397702e+01  2.62937022e+00  8.23569564e+00
   1.47756376e+01  1.40475816e+01  2.42798432e+00 -4.03874354e+00
   1.38391293e+01  7.11469617e-01 -3.92840983e+00  8.38746654e+00
  -9.63833757e+00 -1.10353616e+01  1.77178537e+00  1.60407903e+01
  -1.53776977e+00  1.73779376e+00  1.97361322e+01  2.67388030e+00
   1.39654939e+00 -9.97914014e-02  4.00732379e+00  7.22602102e+00
  -2.87117331e+00 -9.02345130e+00 -5.19244094e+00 -8.25250696e+00
   3.23706310e+00 -1.89942083e+00  5.65319802e+00 -5.26757538e+00
   1.55386364e+00  1.68048333e+00 -3.39129404e+00  1.16356258e+01
   4.98135217e+00  9.27520948e+00  8.48315961e+00  1.53127045e+01
   4.45224916e+00  6.25038023e+00  2.21803778e+00 -9.66022636e-01
  -1.00354197e+01 -1.00472770e+00 -8.34027398e-01 -2.15660086e+00
   3.03384166e+00 -6.91157523e+00 -3.64102465e+00  7.67848991e+00
   5.60308928e+00  9.66094926e-01 -4.61422692e+00 -3.18864082e+00
   1.52484735e+00  1.90740418e+00  4.16148082e+00 -2.32143338e+00
  -4.11922381e+00 -5.84036033e+00  1.29524018e+00  1.81304966e+00
  -1.19291889e+00 -3.19046341e+00]]
******** n_components=88 ********
Explained Variance Ratio per Component: [0.09722155 0.03288487 0.02784989 0.01907246 0.01673716 0.01475282
 0.01010298 0.00927506 0.0078448  0.00695542 0.00637769 0.00568514
 0.00550618 0.00521396 0.00476945 0.00443479 0.00425115 0.00404316
 0.00396367 0.00368677 0.00345759 0.00327868 0.00311001 0.00305991
 0.00290326 0.00271682 0.00264588 0.00259079 0.00251234 0.00242957
 0.00233459 0.00227565 0.00215698 0.00214635 0.00208872 0.00201995
 0.00197339 0.00191128 0.00187671 0.00184307 0.00182767 0.00178512
 0.00173918 0.00168906 0.00167691 0.00163592 0.00159815 0.00156639
 0.00155669 0.00153008 0.00149735 0.0014624  0.00145607 0.0014367
 0.00140309 0.00140124 0.00138648 0.00135955 0.00135009 0.0013447
 0.00133499 0.00129989 0.0012907  0.00128208 0.00124414 0.00124092
 0.00122589 0.00121868 0.00120045 0.00118186 0.00118274 0.00116699
 0.00116191 0.00114668 0.00113218 0.00112973 0.00111601 0.0011072
 0.00109184 0.00107954 0.0010654  0.00105477 0.00104534 0.00103727
 0.00102835 0.00102582 0.0010127  0.00101014]
Explained Variance per Component: [237.2527247   80.24995646  67.96295031  46.54311306  40.84420643
  36.0017507   24.65460655  22.63420233  19.14389613  16.97351609
  15.56367971  13.87362355  13.43690362  12.72377808  11.63902496
  10.82235054  10.37420416   9.86664524   9.67265686   8.99693974
   8.43765202   8.00106172   7.58944722   7.46719544   7.08492053
   6.62994577   6.45682147   6.32237225   6.13093222   5.92895239
   5.69717585   5.55333365   5.26373929   5.23780069   5.09715797
   4.9293517    4.81572219   4.66415025   4.5797878    4.49770039
   4.46012618   4.35629007   4.24416613   4.12186078   4.09222371
   3.99217483   3.90001226   3.82251524   3.79884244   3.73389088
   3.65403782   3.56874144   3.55328542   3.50602718   3.42401534
   3.41948886   3.3834668    3.31775717   3.29465432   3.28150647
   3.25780635   3.17214936   3.14973342   3.1286898    3.03611489
   3.02824314   2.99158897   2.97397809   2.92948965   2.88412403
   2.88627651   2.84784593   2.83543659   2.79827213   2.7628905
   2.7569104    2.72342872   2.7019342    2.66445729   2.63444451
   2.59991846   2.57397928   2.55098508   2.5312844    2.50951384
   2.50333857   2.47133477   2.46506886]
Total Variance Explained by all components: 0.40277750234220333
Shape of the reduced data:
(56318, 88)
Reduced data sample: [[ 1.33003764e+02  3.11663647e+01  1.23427211e+02  1.54590266e+01
  -2.34470214e+01 -6.45674004e+00  1.28157058e+01  4.84912886e+01
   2.18299125e+00  2.26885360e+00  3.27898200e+01  4.25132952e+01
   2.81327383e+01  3.21980696e+01  1.59420854e+01  2.61143592e+01
   9.77186610e+00 -6.71932812e+00  6.67011620e+00  1.29765548e+00
   5.82982098e-01 -7.83825349e+00  1.21250563e+01 -7.41438848e+00
   1.77287123e+01 -4.35783685e+00  2.03671722e+01  6.96912314e+00
   1.29781018e+01  6.90685347e+00  1.56755773e+01  1.43082328e+01
   1.17297021e+01 -8.95007334e+00 -7.18824498e+00  1.74006075e+01
  -1.08273696e+01  8.80860788e+00 -6.28546260e+00 -8.16961832e+00
   1.82924842e+00  3.05576217e+00 -6.51275880e+00  2.01177956e+00
   2.72819434e+00  5.59212891e+00  2.71066111e+00  1.16420373e+01
   6.56869585e+00  1.43236161e+01 -1.75729717e+00  1.32629060e+01
   9.09901688e+00  3.09852488e+00  2.28953298e+00  4.82499752e+00
   2.01200383e+00  4.60915636e+00  5.69290873e+00  3.20282258e+00
  -4.04025227e+00  3.00396754e+00 -4.07415388e+00  2.40995855e+00
   8.59577646e+00  1.62576482e+00 -3.68352903e+00 -3.46467973e+00
  -9.24861211e+00  7.28150917e-01 -1.91941455e+00 -6.38298211e-01
  -1.70312630e+00  1.43010126e+01 -3.59225666e+00  6.81044892e+00
  -2.83022984e+00  5.65127737e+00  4.92951599e+00 -1.59541404e+00
   1.40457527e+00 -2.65838771e+00  6.47252637e+00  2.40473514e+00
   5.40890054e+00  5.11752416e+00  2.40625659e+00  1.89405828e+00]
 [ 1.21155080e+02  5.69579855e+01  8.59574217e+01 -8.90387647e+00
   5.83568905e+00 -1.96499938e+00 -8.10543676e+00  6.02060297e+00
   3.19552288e+01  1.31808479e+01  1.59283297e+01  3.10155986e+01
   1.88028267e+01  2.58402154e+01  2.62835287e+00  8.23563563e+00
   1.47747677e+01  1.40468709e+01  2.42815610e+00 -4.03491011e+00
   1.38387310e+01  7.15527761e-01 -3.92756666e+00  8.39606257e+00
  -9.62755301e+00 -1.10691356e+01  1.79837917e+00  1.60463724e+01
  -1.52544453e+00  1.71792950e+00  1.97056359e+01  2.74082185e+00
   1.47926423e+00 -1.57324119e-02  4.10462542e+00  7.08735990e+00
  -2.99235363e+00 -8.90795153e+00 -5.25407722e+00 -7.35262030e+00
   5.33690666e+00  1.98391335e+00  5.31596494e+00 -4.13619434e+00
   1.55949980e+00  2.70097324e+00  2.47551196e+00  9.85054617e+00
  -9.67941507e+00  9.93597382e+00 -6.80843800e+00  1.47661253e+01
   4.27253881e+00  6.87384635e+00 -1.30281449e+00  3.97993991e+00
  -8.72409187e+00 -6.29544938e-02 -1.13728620e-01  4.80491257e-02
   2.59818037e+00  6.07907396e+00 -1.92555331e-01  7.75986741e+00
   2.77484158e+00 -6.07760242e+00 -6.72337662e+00  1.00910226e-01
   2.71628316e+00  3.13010081e+00 -3.79657142e-01 -8.96463392e-01
  -4.90361304e+00  3.84090613e+00 -2.14792211e+00  1.13814818e+01
   3.29898831e-01 -5.30162753e-01  1.32228851e-01 -4.41752629e+00
   4.13138716e+00  4.68430606e+00 -3.49290479e+00 -1.71740355e+00
   2.82982760e+00  6.76004203e-01  3.64081663e+00 -1.00931616e+00]]
******** n_components=98 ********
Explained Variance Ratio per Component: [0.09722155 0.03288487 0.02784989 0.01907246 0.01673716 0.01475282
 0.01010298 0.00927506 0.0078448  0.00695542 0.00637769 0.00568514
 0.00550618 0.00521396 0.00476945 0.00443479 0.00425115 0.00404316
 0.00396367 0.00368677 0.00345759 0.00327868 0.00311001 0.00305991
 0.00290327 0.00271684 0.00264587 0.00259078 0.00251238 0.00242959
 0.00233457 0.00227573 0.00215701 0.00214647 0.0020889  0.00202025
 0.00197347 0.0019113  0.00187686 0.00184365 0.00182852 0.00178515
 0.00173996 0.00169067 0.00167726 0.0016374  0.00160085 0.00156581
 0.00155726 0.00153265 0.00150056 0.00146652 0.00145768 0.00143576
 0.00141066 0.00140393 0.00139031 0.00137041 0.00135676 0.0013516
 0.00132942 0.00130205 0.00129577 0.00128443 0.00125565 0.00124496
 0.00123854 0.00122756 0.00120598 0.00119281 0.00119058 0.00118158
 0.00116947 0.00115395 0.00114139 0.00113478 0.00113253 0.00111872
 0.00109394 0.00108645 0.00108241 0.00107142 0.00105757 0.00104442
 0.00104141 0.00103467 0.00103002 0.00101543 0.00100607 0.00100711
 0.00099963 0.00097883 0.00097341 0.00096371 0.00095674 0.00095451
 0.00094667 0.00093631]
Explained Variance per Component: [237.2527247   80.24995646  67.96295031  46.54311306  40.84420643
  36.0017507   24.65460655  22.63420233  19.14389608  16.97351607
  15.56368012  13.87362394  13.43690363  12.7237784   11.6390251
  10.82235155  10.37420507   9.86664546   9.67265877   8.99693894
   8.43765582   8.00106686   7.5894459    7.46718202   7.08493583
   6.62999688   6.45679253   6.32235033   6.13103078   5.9290146
   5.69711292   5.55353686   5.2638248    5.23809733   5.09759492
   4.93009007   4.81592312   4.66420388   4.58016292   4.49910483
   4.46219147   4.35636838   4.24608437   4.12580106   4.09307697
   3.995794     3.90659415   3.82108499   3.80022732   3.74017673
   3.661852     3.57878419   3.55722027   3.50373621   3.44246907
   3.4260538    3.39282042   3.34426306   3.31094968   3.29836203
   3.2442172    3.17743409   3.16210925   3.13443136   3.06419706
   3.03810984   3.02244199   2.99564517   2.9429994    2.9108539
   2.90541125   2.88344565   2.8539023    2.81601229   2.78536561
   2.76924586   2.76374727   2.73004673   2.66956587   2.65130146
   2.64144947   2.6146117    2.58082624   2.54871961   2.54139335
   2.52492997   2.51359478   2.4779969    2.45514178   2.45767491
   2.43942473   2.38867023   2.37543431   2.35177858   2.33475201
   2.32930931   2.31019566   2.28489888]
Total Variance Explained by all components: 0.4128026938858566
Shape of the reduced data:
(56318, 98)
Reduced data sample: [[ 1.33003764e+02  3.11663647e+01  1.23427211e+02  1.54590265e+01
  -2.34470222e+01 -6.45674048e+00  1.28156979e+01  4.84912826e+01
   2.18301784e+00  2.26886536e+00  3.27899127e+01  4.25133299e+01
   2.81327905e+01  3.21984368e+01  1.59423152e+01  2.61145736e+01
   9.76944773e+00 -6.71805837e+00  6.66886362e+00  1.29677533e+00
   5.83502571e-01 -7.83559412e+00  1.21145516e+01 -7.41694650e+00
   1.77250099e+01 -4.36771330e+00  2.03464385e+01  6.97902421e+00
   1.29571015e+01  6.94753682e+00  1.56993892e+01  1.44045905e+01
   1.16952604e+01 -8.87728524e+00 -7.14940167e+00  1.72671872e+01
  -1.06650979e+01 -8.61512760e+00 -6.20993967e+00 -8.40733278e+00
   1.16423283e+00  2.68448516e+00 -7.20516981e+00  1.57878191e+00
   3.09221869e+00  5.52260347e+00 -2.62554092e+00  1.28363518e+01
   6.80016365e+00  1.25103152e+01  1.21391874e+00  1.49507871e+01
   6.79871241e+00  4.71498985e+00  3.63592167e+00  3.40985451e+00
   2.79283624e+00 -4.87978631e+00  5.35499645e+00 -3.81660570e+00
  -3.21011348e+00  2.06169172e+00  2.97779522e+00  2.60037628e+00
   3.49345823e+00 -5.94792019e-01  9.27237459e+00  2.54790531e+00
   1.31464042e+00 -5.85745583e+00  1.09477316e+01 -1.57992999e-01
  -7.20274544e+00  1.06349517e+01  2.00049927e+00  1.29766698e+01
  -6.49104370e+00 -2.31903136e-01 -3.36271938e+00  4.60502119e+00
   9.52167707e-01  2.82649141e+00  4.39742261e+00  8.19301027e-01
  -2.41695071e-01  4.51604788e+00  6.74154786e+00 -9.04671600e-01
   2.78714732e+00 -2.00026852e+00  3.95708693e+00 -3.97165651e+00
  -3.70959734e+00 -1.44814960e+00 -2.28116922e+00 -5.05927254e-01
  -1.85025715e+00 -1.04107779e+00]
 [ 1.21155080e+02  5.69579855e+01  8.59574217e+01 -8.90387659e+00
   5.83568894e+00 -1.96499977e+00 -8.10543466e+00  6.02060750e+00
   3.19552440e+01  1.31808340e+01  1.59283820e+01  3.10155069e+01
   1.88029106e+01  2.58404392e+01  2.62848275e+00  8.23559556e+00
   1.47737921e+01  1.40480854e+01  2.42921675e+00 -4.03642256e+00
   1.38382040e+01  7.14881617e-01 -3.93024781e+00  8.39190284e+00
  -9.63231500e+00 -1.10480238e+01  1.78004086e+00  1.60641765e+01
  -1.54402527e+00  1.72913747e+00  1.96928364e+01  2.74981370e+00
   1.45598938e+00 -5.91931582e-02  4.09055817e+00  7.18905323e+00
  -3.08507336e+00  8.92162116e+00 -5.04558640e+00 -7.68590707e+00
   4.87843206e+00  1.80461866e+00  5.10221842e+00 -4.63447061e+00
   1.42553917e+00  2.75074644e+00 -1.45009834e+00  1.05989497e+01
  -9.98839966e+00  9.92239163e+00  6.09275545e+00  1.55224930e+01
   7.55303120e-01  6.71616359e+00  3.69074137e+00  1.72041113e+00
  -7.24776377e+00  8.20527069e-01  3.22798489e+00  4.37996069e-01
   3.37962014e+00  6.07974060e+00 -5.43361244e-01  9.03912281e+00
   5.27582294e+00  6.89957495e+00  4.56540290e+00  4.12917348e-01
   4.26989726e+00 -2.54322998e+00 -3.85973296e+00  4.04186026e+00
  -7.62421570e-01  1.88288357e+00  7.57638154e-01  7.30808771e+00
  -1.57296175e+00 -1.57769161e+00  4.07560712e+00  1.73142650e+00
   1.75658183e+00  3.20713950e+00 -3.01886724e+00 -5.54619567e+00
   1.07150577e+00 -3.10153157e+00 -4.90577205e+00  2.63201194e+00
   2.64216189e+00  2.04368360e+00  5.32531055e+00  5.98395203e+00
  -6.38830544e+00  1.29034415e+00  1.47415969e+00 -5.14515947e+00
   2.79036165e+00  3.04542267e+00]]
******** n_components=108 ********
Explained Variance Ratio per Component: [0.09722155 0.03288487 0.02784989 0.01907246 0.01673716 0.01475282
 0.01010298 0.00927506 0.0078448  0.00695542 0.00637769 0.00568514
 0.00550618 0.00521396 0.00476945 0.00443479 0.00425115 0.00404316
 0.00396367 0.00368677 0.00345759 0.00327868 0.00311001 0.00305991
 0.00290327 0.00271684 0.00264588 0.00259079 0.00251238 0.00242959
 0.0023346  0.00227573 0.0021571  0.00214649 0.00208891 0.00202033
 0.00197363 0.0019116  0.00187713 0.00184359 0.00182882 0.00178564
 0.00174026 0.0016909  0.00167782 0.00163779 0.00160105 0.001569
 0.00155738 0.00153334 0.00150296 0.00146847 0.00146065 0.00143828
 0.00141224 0.0014069  0.00139113 0.00137201 0.00136138 0.00135531
 0.00134052 0.00130835 0.00130178 0.0012897  0.0012616  0.00125046
 0.00124126 0.0012364  0.00121986 0.0012137  0.00120196 0.0011873
 0.00117532 0.00116058 0.00114771 0.00114721 0.00113804 0.00112236
 0.00111935 0.00111324 0.00109285 0.00108954 0.0010746  0.00106642
 0.00105688 0.00105103 0.00104016 0.00103661 0.00102646 0.00101452
 0.00100923 0.00100505 0.00099556 0.00099097 0.00098023 0.00097421
 0.00097154 0.00096716 0.00096127 0.00094973 0.00094137 0.0009333
 0.00092764 0.00092798 0.00092083 0.00090716 0.00090368 0.00089446]
Explained Variance per Component: [237.2527247   80.24995646  67.96295031  46.54311306  40.84420643
  36.0017507   24.65460654  22.63420233  19.14389612  16.97351608
  15.56367997  13.87362356  13.43690362  12.72377837  11.63902546
  10.82235183  10.37420569   9.86664478   9.67265801   8.99693917
   8.43765693   8.00106777   7.58945145   7.46718469   7.08493241
   6.62999311   6.45681334   6.32238847   6.13104858   5.92901325
   5.69720432   5.55352588   5.2640452    5.23813836   5.09762088
   4.93028535   4.81630924   4.66492972   4.58081445   4.4989735
   4.46292713   4.35754346   4.24680078   4.12634796   4.09443007
   3.99673874   3.90708502   3.8288829    3.80052096   3.7418613
   3.66772454   3.58356141   3.56446247   3.50986908   3.44632694
   3.43330571   3.39481541   3.34814609   3.32221546   3.30739736
   3.27132411   3.19281241   3.17677541   3.14728775   3.07871096
   3.05154466   3.02909067   3.01723063   2.97686307   2.96182221
   2.93317139   2.89739615   2.86816844   2.83219123   2.80078332
   2.79956381   2.77718193   2.73893126   2.73158381   2.71666884
   2.66692542   2.65884015   2.6223728    2.60242215   2.57914175
   2.56485218   2.53832281   2.52967708   2.50489926   2.4757665
   2.46286448   2.4526504    2.42948573   2.41830249   2.39208086
   2.37740001   2.37088143   2.36019591   2.34582717   2.31766568
   2.29724409   2.27754882   2.26375655   2.26457129   2.24712139
   2.21377598   2.20527095   2.18277008]
Total Variance Explained by all components: 0.422641432626354
Shape of the reduced data:
(56318, 108)
Reduced data sample: [[ 1.33003764e+02  3.11663647e+01  1.23427211e+02  1.54590265e+01
  -2.34470219e+01 -6.45674014e+00  1.28157041e+01  4.84912864e+01
   2.18299980e+00  2.26877792e+00  3.27898774e+01  4.25132593e+01
   2.81328885e+01  3.21980142e+01  1.59421893e+01  2.61138611e+01
   9.76911861e+00 -6.71669276e+00  6.66901407e+00  1.29772692e+00
   5.85521944e-01 -7.83515734e+00  1.21179720e+01 -7.41355481e+00
   1.77278400e+01 -4.34881231e+00  2.03608487e+01  6.98646164e+00
   1.29641272e+01  6.92288682e+00  1.57071066e+01  1.43356884e+01
   1.16913918e+01 -8.81023215e+00 -7.20199227e+00  1.73017182e+01
  -1.05749286e+01  8.69369015e+00 -6.14591075e+00 -8.41755036e+00
   1.00274803e+00 -2.79077658e+00 -7.06057225e+00  1.88142717e+00
   3.06854215e+00  5.42060953e+00 -3.04382336e+00  1.09516315e+01
   7.86766109e+00  1.32423426e+01 -1.17841287e+00  1.47279612e+01
   7.25588657e+00  4.64469517e+00  3.76671576e+00 -1.73076590e+00
   2.33999465e+00 -3.55424691e+00  3.83342072e+00  5.99063081e+00
  -4.00408482e+00  3.82716070e+00  2.50203456e+00  1.32719069e+00
  -3.12889026e+00 -3.01913902e+00 -7.81818506e+00 -3.90505998e+00
  -9.77260919e+00 -6.93445841e+00 -2.94157759e+00  3.17467824e+00
   3.97917095e+00  2.77570529e+00  3.01744001e+00  1.64077575e+01
  -8.87271116e+00 -2.62054482e+00  2.30407370e+00  4.74453433e+00
  -1.41269625e+00 -2.79216583e+00  6.37523092e+00 -9.70748009e-01
   3.38644900e+00  8.53306476e-01 -8.64297221e-01 -2.74005570e-01
   1.15670480e+01  1.44913615e+00  1.52807514e+00  2.27564985e+00
   4.79446939e+00  1.64901046e+00  3.08146387e-01 -3.12917533e+00
   6.85386496e-01 -4.47581543e+00  1.20036590e+00  5.11643270e+00
   2.77614158e+00  3.83486856e-01 -1.03350597e+00 -5.30811067e-01
   3.90524680e+00 -1.31223755e+00  7.49673357e+00 -2.95417320e+00]
 [ 1.21155080e+02  5.69579855e+01  8.59574217e+01 -8.90387657e+00
   5.83568896e+00 -1.96499921e+00 -8.10543190e+00  6.02060397e+00
   3.19552464e+01  1.31808333e+01  1.59283713e+01  3.10155646e+01
   1.88029015e+01  2.58401145e+01  2.62814205e+00  8.23575610e+00
   1.47743264e+01  1.40476584e+01  2.42927090e+00 -4.03623915e+00
   1.38368600e+01  7.12365469e-01 -3.93053992e+00  8.38815351e+00
  -9.64007109e+00 -1.10395435e+01  1.78408086e+00  1.60459640e+01
  -1.54082968e+00  1.72591900e+00  1.96848032e+01  2.74403421e+00
   1.40063789e+00 -6.35011362e-02  4.10513359e+00  7.06231556e+00
  -3.03910154e+00 -8.79802659e+00 -5.32460921e+00 -7.71104356e+00
   4.85892369e+00 -1.90913808e+00  5.10640161e+00 -4.51969578e+00
   1.21257258e+00  2.71610198e+00 -1.81451486e+00  1.08749210e+01
  -9.35857565e+00  1.03620939e+01 -5.96524291e+00  1.48942356e+01
   2.13224363e+00  7.83814758e+00  4.51676839e+00 -1.48741979e+00
  -8.38739343e+00  7.92962496e-01  1.58995184e+00  2.97438183e-01
   4.21035987e+00  6.39792705e+00 -2.64874214e+00  6.80454360e+00
  -4.00664618e+00  2.50819358e+00 -6.26995142e+00 -1.07052060e+00
   7.27909468e-01 -6.63134935e-02 -3.92831692e+00 -3.58340902e+00
   1.53464582e+00 -7.37345149e-01  1.08945592e+00  8.58031915e+00
   1.56167101e+00  3.46387937e+00  1.31163848e+00  2.65754303e+00
  -4.46342811e-01 -6.65681830e+00 -3.09667208e+00  4.34191688e+00
   2.64449813e+00  7.56880801e-01  2.61336734e-01 -7.11178575e-01
   1.19944443e-01 -4.63815258e+00  6.54110512e-01  3.42657908e+00
   1.86989870e-01  2.23466382e+00 -1.71003428e+00 -2.11933988e+00
   4.02580740e+00 -2.80462679e+00  6.58386860e-01 -2.31639301e+00
  -6.88144347e+00  1.56929194e+00 -7.92498974e-01  2.98531553e-01
  -1.40331206e+00 -2.88055610e+00  3.56640076e+00 -2.07260024e+00]]
******** n_components=118 ********
Explained Variance Ratio per Component: [0.09722155 0.03288487 0.02784989 0.01907246 0.01673716 0.01475282
 0.01010298 0.00927506 0.0078448  0.00695542 0.00637769 0.00568514
 0.00550618 0.00521396 0.00476945 0.00443479 0.00425115 0.00404316
 0.00396367 0.00368677 0.00345759 0.00327868 0.00311001 0.00305991
 0.00290327 0.00271685 0.00264588 0.00259079 0.00251239 0.00242961
 0.00233461 0.00227577 0.00215714 0.0021465  0.00208895 0.00202038
 0.00197378 0.00191173 0.00187722 0.00184379 0.00182895 0.00178563
 0.00174023 0.00169143 0.00167805 0.00163801 0.00160134 0.00156888
 0.0015578  0.00153365 0.00150325 0.00146894 0.00146148 0.00143994
 0.00141402 0.00140793 0.00139187 0.00137342 0.00136212 0.00135571
 0.00134182 0.00130961 0.00130304 0.00129116 0.00126156 0.00125637
 0.00124488 0.00123039 0.00122242 0.00121478 0.0011991  0.00118707
 0.0011856  0.00116392 0.00115897 0.00114489 0.00113686 0.00113052
 0.00112848 0.00111993 0.00109964 0.00108828 0.00108241 0.00107838
 0.00106669 0.00105812 0.00105317 0.00104731 0.00104163 0.00103648
 0.00103005 0.00101276 0.00101136 0.00100756 0.00099559 0.00099302
 0.00098469 0.000975   0.0009703  0.00097072 0.00096064 0.00095237
 0.00095035 0.00094241 0.00092696 0.00092166 0.00091142 0.00090034
 0.00090091 0.0008993  0.00089233 0.00088804 0.00087818 0.00087755
 0.0008736  0.00086624 0.00086409 0.0008557 ]
Explained Variance per Component: [237.2527247   80.24995646  67.96295031  46.54311306  40.84420643
  36.0017507   24.65460654  22.63420233  19.14389615  16.97351609
  15.56367997  13.87362396  13.43690363  12.72377825  11.63902516
  10.82235154  10.37420573   9.8666447    9.67265813   8.99693957
   8.43765573   8.0010657    7.58945231   7.46719349   7.08493835
   6.63001611   6.45681386   6.32239132   6.13106662   5.92905686
   5.6972266    5.55363458   5.26412761   5.23818115   5.09773191
   4.93039198   4.81667379   4.66524737   4.58104771   4.49944817
   4.46323394   4.35753871   4.24674377   4.12765979   4.09500332
   3.997287     3.90780924   3.82859101   3.80155074   3.74262066
   3.66842116   3.58469416   3.56648588   3.51392711   3.45067956
   3.43581973   3.39661564   3.35159685   3.32401757   3.30839043
   3.27449393   3.19587736   3.17984224   3.15086445   3.07863285
   3.06596691   3.03792742   3.00255016   2.98310538   2.96447148
   2.92620617   2.89683266   2.89326257   2.84034194   2.8282652
   2.79390053   2.77431791   2.75884381   2.75386039   2.73300916
   2.68349007   2.65577356   2.64143116   2.63161579   2.60308173
   2.5821564    2.57009487   2.5557786    2.54192682   2.52936279
   2.51366064   2.47147208   2.46804536   2.45877331   2.42957824
   2.42329837   2.40296619   2.3793323    2.36786191   2.36886746
   2.34427652   2.32410558   2.31915793   2.29979354   2.2620805
   2.24916641   2.22416359   2.19711652   2.19852153   2.19457763
   2.17758648   2.16711552   2.143048     2.14150905   2.13188151
   2.11390906   2.10867191   2.08819037]
Total Variance Explained by all components: 0.4318631193029966
Shape of the reduced data:
(56318, 118)
Reduced data sample: [[ 1.33003764e+02  3.11663647e+01  1.23427211e+02  1.54590266e+01
  -2.34470217e+01 -6.45674035e+00  1.28157029e+01  4.84912843e+01
   2.18300067e+00  2.26882456e+00  3.27898950e+01  4.25132254e+01
   2.81328138e+01  3.21980830e+01  1.59419939e+01  2.61142309e+01
   9.76962641e+00 -6.71782177e+00  6.66853306e+00  1.29662661e+00
   5.86129759e-01 -7.83311983e+00  1.21234282e+01 -7.41916327e+00
   1.77308416e+01 -4.35154769e+00  2.03576545e+01  6.98861510e+00
   1.29710735e+01  6.95339230e+00  1.57119615e+01  1.44067178e+01
   1.17915831e+01 -8.76476307e+00 -7.17742748e+00  1.72923024e+01
  -1.05734674e+01  8.62699810e+00 -6.19316695e+00 -8.44733906e+00
   1.11736580e+00  2.76593286e+00 -7.15912376e+00  1.98042678e+00
   2.87613308e+00  5.23246347e+00 -2.59139534e+00  1.21169174e+01
   7.24184138e+00  1.34113001e+01  1.10718223e+00  1.42523850e+01
   6.84768212e+00  5.61528742e+00  3.27392604e+00  2.07686630e+00
   2.77045540e+00 -3.20725664e+00  4.78832125e+00 -4.86514662e+00
  -4.07578412e+00  2.11402791e+00  1.49433523e+00  1.09873372e+00
  -5.30248374e+00 -1.33471434e-02 -8.48177509e+00  1.95783193e+00
   1.11058577e+01 -5.95348915e+00 -3.72471821e+00  1.50175334e+00
   2.75914896e+00  5.04292185e+00  1.54106700e+01  3.98488162e+00
   5.01706155e-02  3.65542559e+00  5.01893723e+00 -1.18933794e+00
   2.27790788e+00  8.54062305e+00 -7.49178230e+00 -8.52835766e-01
   3.76923522e+00  5.70233232e+00 -1.80603983e+00 -2.88074865e+00
  -4.99463779e+00  1.55146316e+00  2.28092955e+00 -3.15522854e+00
  -4.92663505e-01 -4.14421144e-01 -6.13554851e+00  1.53123040e+00
  -7.84620966e-01  2.32652083e-01 -1.01623832e+00 -3.90778860e+00
  -1.62060269e-01  8.53250687e-01 -1.49071437e-01  4.19103611e+00
   1.76159222e+00  1.48059718e+00  1.91055973e+00  4.82674589e+00
   1.93610419e+00 -3.82866953e+00  1.31914911e-01 -4.26723218e+00
   4.25185152e+00 -5.11031519e+00  5.39254886e+00  2.30389550e+00
  -2.01318661e+00  3.23390947e+00]
 [ 1.21155080e+02  5.69579855e+01  8.59574217e+01 -8.90387652e+00
   5.83568903e+00 -1.96499966e+00 -8.10543359e+00  6.02060854e+00
   3.19552428e+01  1.31808239e+01  1.59283782e+01  3.10154272e+01
   1.88029127e+01  2.58401742e+01  2.62823376e+00  8.23526127e+00
   1.47736121e+01  1.40478764e+01  2.42969136e+00 -4.03637690e+00
   1.38388643e+01  7.16410669e-01 -3.92478695e+00  8.39199139e+00
  -9.63685554e+00 -1.10469640e+01  1.77887550e+00  1.60601000e+01
  -1.53062787e+00  1.74145639e+00  1.97100227e+01  2.77303501e+00
   1.41959807e+00 -4.63422755e-02  4.04703402e+00  7.13970543e+00
  -3.01588991e+00 -8.93694713e+00 -5.17845330e+00 -7.72626898e+00
   4.91173558e+00  1.96453763e+00  5.04844609e+00 -4.56241350e+00
   1.23450622e+00  2.54142583e+00 -1.59354909e+00  1.08262417e+01
  -9.87155924e+00  1.01784343e+01  6.05217145e+00  1.45736285e+01
   1.08091436e+00  9.05125651e+00  3.33874392e+00  1.41912518e+00
  -7.30118164e+00  6.18464937e-01  1.83018505e+00 -3.49671523e-02
   3.25626340e+00  5.95198587e+00 -2.71790286e+00  8.30430415e+00
  -1.51161731e+00 -5.01144626e+00 -6.66668681e+00  2.06074238e+00
   2.00854521e-01  1.52330954e+00 -4.70610601e+00 -7.50649682e-01
  -4.35456607e+00  3.94120244e+00  4.10668072e+00  2.94935660e+00
  -2.73459845e+00  6.31392584e+00 -2.42962917e+00 -8.35469434e-01
   1.96645430e+00  3.84197959e+00 -3.01023616e+00  6.03440573e+00
  -1.52715223e+00 -4.43906252e+00 -4.12426967e+00  7.73597819e-01
   5.09922200e+00  1.98437591e+00  8.84536683e-01  3.02304613e+00
   3.29809036e+00 -6.09133972e+00 -2.86443891e+00  5.18321085e+00
   2.59769582e+00 -3.42282320e+00 -3.78104054e+00 -7.59444290e-01
   2.71983866e-02 -1.21731684e+00  3.12306348e+00  2.27768743e+00
  -4.29259060e-01  2.31585876e+00  1.86535032e+00  1.42165673e+00
  -1.59171560e+00 -2.58450336e+00  3.62454907e+00 -2.08500146e+00
   5.10385236e-01  2.37909524e+00  3.32376343e+00 -5.32475329e-01
   1.09409269e+00  5.00862293e-01]]
******** n_components=128 ********
Explained Variance Ratio per Component: [0.09722155 0.03288487 0.02784989 0.01907246 0.01673716 0.01475282
 0.01010298 0.00927506 0.0078448  0.00695542 0.00637769 0.00568514
 0.00550618 0.00521396 0.00476945 0.00443479 0.00425115 0.00404316
 0.00396367 0.00368677 0.00345759 0.00327868 0.00311001 0.00305991
 0.00290327 0.00271685 0.00264588 0.00259079 0.00251239 0.00242961
 0.00233462 0.00227577 0.00215715 0.00214652 0.00208894 0.0020204
 0.00197379 0.00191174 0.00187723 0.00184386 0.00182894 0.00178568
 0.00174035 0.0016913  0.00167818 0.00163804 0.00160223 0.00156915
 0.0015586  0.00153372 0.00150323 0.00146947 0.0014617  0.00143975
 0.00141352 0.00140829 0.00139268 0.00137405 0.00136291 0.00135512
 0.00134179 0.00131026 0.00130395 0.00129505 0.0012586  0.00125704
 0.00124736 0.00124096 0.00122507 0.00121703 0.0012035  0.00118921
 0.00118753 0.00116436 0.00115874 0.0011507  0.00114294 0.00113362
 0.00112702 0.00112165 0.00110154 0.00109646 0.00108958 0.00107501
 0.0010747  0.00105908 0.00105297 0.00105084 0.001044   0.00104074
 0.00103344 0.00102147 0.00101757 0.00101313 0.0010086  0.00099358
 0.0009908  0.0009785  0.00097982 0.00097184 0.00096717 0.00096153
 0.00095386 0.00095367 0.00094409 0.00093642 0.00092623 0.00092309
 0.00091859 0.00091333 0.00090997 0.00089948 0.00089577 0.00089083
 0.0008861  0.00087987 0.00087711 0.00087182 0.00086828 0.00086218
 0.00085685 0.00084936 0.00084754 0.00083378 0.00083362 0.00083288
 0.00082376 0.00082384]
Explained Variance per Component: [237.2527247   80.24995646  67.96295031  46.54311306  40.84420643
  36.0017507   24.65460655  22.63420233  19.14389617  16.97351607
  15.56367996  13.87362371  13.43690363  12.72377829  11.63902524
  10.82235201  10.37420548   9.86664416   9.67265782   8.99693989
   8.4376566    8.00106831   7.58945294   7.46718868   7.08493562
   6.63001238   6.45682463   6.32238988   6.13107141   5.92905407
   5.69723275   5.55362692   5.26414912   5.23820899   5.09770349
   4.93044066   4.81670901   4.6652819    4.58106587   4.49961776
   4.46320887   4.35765523   4.24702419   4.12732971   4.09532291
   3.9973575    3.90998148   3.82923935   3.80349289   3.74279044
   3.66838852   3.585986     3.56702179   3.51346026   3.44944811
   3.43668516   3.3985882    3.35312833   3.32596251   3.30694281
   3.27439993   3.19746304   3.18207214   3.16035242   3.07140348
   3.0675889    3.043979     3.02834911   2.98956819   2.96994717
   2.93693287   2.90205466   2.89796539   2.84141428   2.82771563
   2.80809429   2.78915417   2.76641007   2.75030324   2.73719793
   2.68811436   2.67573504   2.65893156   2.62338602   2.62262018
   2.58451503   2.56959949   2.56440881   2.54771375   2.53975771
   2.52192319   2.4927198    2.48321249   2.47237959   2.46131156
   2.42467289   2.41786997   2.38785919   2.39107329   2.37161723
   2.36022381   2.34645215   2.32772289   2.32728187   2.30388938
   2.28517917   2.26029864   2.2526542    2.24166626   2.22883209
   2.22063639   2.19503797   2.18596567   2.17391716   2.16236672
   2.14718252   2.14044973   2.1275353    2.11889245   2.10401598
   2.09100822   2.0727187    2.06827651   2.03470801   2.03431575
   2.03250511   2.01024734   2.0104385 ]
Total Variance Explained by all components: 0.4406819452826911
Shape of the reduced data:
(56318, 128)
Reduced data sample: [[ 1.33003764e+02  3.11663647e+01  1.23427211e+02  1.54590266e+01
  -2.34470219e+01 -6.45674028e+00  1.28157034e+01  4.84912811e+01
   2.18298958e+00  2.26880621e+00  3.27898751e+01  4.25132413e+01
   2.81329301e+01  3.21982234e+01  1.59424536e+01  2.61140323e+01
   9.76963729e+00 -6.71702592e+00  6.66833703e+00  1.29615812e+00
   5.85739396e-01 -7.83354066e+00  1.21214566e+01 -7.41504820e+00
   1.77301943e+01 -4.35216285e+00  2.03525233e+01  6.98930083e+00
   1.29639416e+01  6.95707342e+00  1.56759485e+01  1.44085248e+01
   1.17676424e+01 -8.78707827e+00 -7.16502286e+00  1.73127516e+01
  -1.06101523e+01  8.57627914e+00 -6.17010503e+00 -8.38926908e+00
   1.22170388e+00  2.73484589e+00 -7.19538523e+00  2.02739651e+00
   3.11170798e+00  5.10849766e+00 -2.55387926e+00  1.18313923e+01
   6.90735214e+00  1.37115461e+01  4.42644936e-01  1.46812593e+01
   6.48925818e+00  5.93669499e+00  3.79731036e+00  1.16549433e+00
   3.22190214e+00 -2.95466275e+00  4.51450643e+00  4.63638680e+00
  -3.85896941e+00  1.60432330e+00 -2.61842922e+00  1.64800692e+00
  -3.06310181e+00 -2.43864316e+00 -6.00960928e+00  6.03779272e+00
  -1.05782972e+01 -8.13273219e+00 -3.18261263e+00  4.59592034e+00
   1.16563861e+00 -4.26389128e+00  1.27094301e+01  7.53584860e+00
  -2.27241132e+00  5.71293831e+00 -8.67512648e-01 -3.83082722e+00
   7.03388412e+00 -3.78405100e+00  3.57136939e+00  9.04910802e+00
   5.38475692e+00 -3.85216067e+00  1.19114135e+00 -2.44895326e+00
  -4.71670058e+00 -3.42787315e+00  1.20814756e+00  1.67031578e+00
   3.67651266e+00 -4.53446386e+00  2.10954444e+00 -4.66824348e+00
  -3.46967725e+00  3.53093883e-01  1.91303710e+00  3.88005811e+00
   1.99875352e+00  5.36125085e-01 -1.13964667e+00  2.42423120e+00
   5.01920330e+00 -8.70203517e-01 -2.15357447e+00 -6.62252015e+00
  -1.87926874e-01 -3.70186936e+00  4.91322455e+00 -5.15735705e+00
   1.88809587e+00 -3.67789192e-01 -4.79776685e+00  1.44633275e+00
   1.36058842e+00 -2.09097094e+00 -3.81197379e+00  3.90179192e+00
  -5.89608308e+00 -4.09838164e+00 -4.17491070e+00 -1.88326678e+00
   5.24118553e+00  2.71168689e+00  6.53732203e-01 -7.99466197e-01]
 [ 1.21155080e+02  5.69579855e+01  8.59574217e+01 -8.90387651e+00
   5.83568903e+00 -1.96499952e+00 -8.10543333e+00  6.02061095e+00
   3.19552480e+01  1.31808171e+01  1.59283829e+01  3.10155021e+01
   1.88029360e+01  2.58403240e+01  2.62853287e+00  8.23549157e+00
   1.47741368e+01  1.40477119e+01  2.42880440e+00 -4.03658770e+00
   1.38388099e+01  7.13631918e-01 -3.92983335e+00  8.39131178e+00
  -9.63542764e+00 -1.10466993e+01  1.78706152e+00  1.60607819e+01
  -1.53157865e+00  1.73953319e+00  1.96879270e+01  2.78259764e+00
   1.42685769e+00 -3.66656405e-02  4.05493461e+00  7.17768837e+00
  -3.01621742e+00 -8.92898575e+00 -5.12997257e+00 -7.66603217e+00
   5.11103899e+00  1.88402017e+00  5.00820553e+00 -4.56679552e+00
   1.41012062e+00  2.54421578e+00 -1.37494546e+00  1.03857273e+01
  -1.00647160e+01  1.04605744e+01  5.85518295e+00  1.48401222e+01
   1.24984708e+00  8.88604272e+00  3.80846018e+00  7.23473603e-01
  -7.67709690e+00  4.89647106e-02  1.79933950e+00  8.88635474e-02
   3.37461789e+00  7.17431872e+00 -1.89770201e+00  7.60567814e+00
   2.07733899e+00 -6.62384501e+00 -4.66462161e+00  3.61143785e+00
   1.39071518e-01  6.20321344e-01 -3.69962405e+00 -1.89041280e+00
  -1.94428892e+00 -4.54686658e-02  6.05557267e+00  5.61405165e+00
   2.18008828e+00 -6.71512232e-01 -7.18238360e-01 -4.97452373e+00
   4.09302925e+00 -4.51188421e+00  1.27669601e+00 -1.42457275e+00
  -2.63466816e+00 -1.12804311e+00 -5.19474073e+00 -2.71797039e+00
   7.67634157e-01  1.48209576e+00 -3.05209200e+00  2.65778710e+00
  -1.44079108e+00  2.94313286e+00 -1.13287828e+00  4.23374595e+00
   8.97683534e-01 -2.70197214e+00  4.33483064e+00  3.08018548e+00
  -2.64306307e+00 -4.03057597e+00  4.64447125e-01  6.53081662e+00
   1.37858271e+00 -1.94665713e+00 -7.83043102e-01 -8.45069264e+00
  -4.49058883e+00  4.85272778e+00  4.24608542e-03 -2.43433563e+00
  -1.29521418e+00  4.78731668e-01  2.30975503e+00  1.60075029e+00
   2.41509841e+00  2.60933029e-01 -1.50303943e+00  4.08292280e+00
  -2.10696375e+00  3.50047695e-01  1.30582359e+00  5.61664720e+00
   4.99361090e+00  2.66198574e+00 -2.41830658e+00  1.11205787e+00]]
******** n_components=138 ********
Explained Variance Ratio per Component: [0.09722155 0.03288487 0.02784989 0.01907246 0.01673716 0.01475282
 0.01010298 0.00927506 0.0078448  0.00695542 0.00637769 0.00568514
 0.00550618 0.00521396 0.00476945 0.00443479 0.00425115 0.00404316
 0.00396367 0.00368677 0.00345759 0.00327868 0.00311001 0.00305991
 0.00290327 0.00271685 0.00264588 0.0025908  0.0025124  0.00242962
 0.00233462 0.00227578 0.00215715 0.00214653 0.00208896 0.00202041
 0.0019738  0.00191178 0.00187727 0.00184384 0.001829   0.00178587
 0.00174053 0.00169152 0.00167825 0.00163807 0.00160209 0.00156938
 0.0015587  0.00153396 0.00150395 0.00146964 0.00146196 0.00144035
 0.00141493 0.00140797 0.00139346 0.00137465 0.00136337 0.00135648
 0.00134137 0.0013114  0.00130669 0.00129696 0.00126346 0.00125765
 0.00124823 0.00124145 0.00122602 0.0012208  0.00120721 0.00119097
 0.00118903 0.00116521 0.00116237 0.00115023 0.0011458  0.00113622
 0.00113206 0.00112812 0.00110509 0.00109778 0.00109242 0.00108273
 0.00107779 0.00106559 0.00106124 0.00105317 0.00105196 0.00104272
 0.00103401 0.00102966 0.0010264  0.00100956 0.00101023 0.00100496
 0.00099568 0.00099397 0.00098315 0.00097748 0.00097019 0.00096788
 0.00096502 0.00096485 0.0009524  0.00094185 0.00093835 0.00093314
 0.00093136 0.00092631 0.00091763 0.00090876 0.00090343 0.00090029
 0.00089966 0.00089087 0.00088818 0.00088467 0.00088002 0.00087758
 0.00087554 0.00087339 0.00086333 0.00085867 0.00085648 0.00084981
 0.00084841 0.00084404 0.00083611 0.00082881 0.00082215 0.00081941
 0.00081273 0.00080987 0.00080924 0.00079539 0.0007944  0.00079209]
Explained Variance per Component: [237.2527247   80.24995646  67.96295031  46.54311306  40.84420643
  36.0017507   24.65460654  22.63420233  19.14389613  16.97351607
  15.56368002  13.87362375  13.43690363  12.72377828  11.6390253
  10.82235183  10.37420577   9.86664451   9.67265825   8.99693964
   8.43765708   8.00106781   7.5894525    7.46718797   7.08493899
   6.63001648   6.45682989   6.32239854   6.13107923   5.92906473
   5.69724729   5.55364713   5.26416875   5.23823456   5.09775669
   4.93047497   4.81672842   4.66538264   4.58115321   4.49958352
   4.46337317   4.35811981   4.24747477   4.12786196   4.09549291
   3.99743225   3.90963186   3.82981727   3.80375449   3.74335894
   3.6701314    3.58641027   3.56765722   3.51492967   3.4528964
   3.43590074   3.40050335   3.35459181   3.32707896   3.31026824
   3.2733949    3.20026127   3.18875033   3.1650004    3.08327219
   3.06907342   3.04608827   3.02955051   2.99189188   2.97915995
   2.94598051   2.90635926   2.90161706   2.84349051   2.83656407
   2.80694323   2.79614056   2.77275377   2.76260395   2.75298728
   2.69679184   2.67895611   2.6658721    2.64222185   2.63016736
   2.60039562   2.58976778   2.57007657   2.56713096   2.54457404
   2.5233333    2.51270985   2.50474331   2.46367101   2.46529684
   2.45242603   2.4297961    2.42561255   2.39920677   2.38537954
   2.36758397   2.36195763   2.35496156   2.35455447   2.32417031
   2.29842282   2.28988146   2.27716898   2.27282747   2.26049848
   2.23932843   2.21766723   2.20466882   2.19699839   2.19547886
   2.17401637   2.16745295   2.15889233   2.14753996   2.14157821
   2.13661793   2.13136396   2.1068039    2.09544806   2.09010318
   2.07381665   2.07040628   2.05974132   2.04039647   2.0225804
   2.0063137    1.99962707   1.98333688   1.97635424   1.97482097
   1.94100662   1.93859461   1.93297037]
Total Variance Explained by all components: 0.4493274172605292
Shape of the reduced data:
(56318, 138)
Reduced data sample: [[ 1.33003764e+02  3.11663647e+01  1.23427211e+02  1.54590266e+01
  -2.34470218e+01 -6.45674034e+00  1.28157032e+01  4.84912845e+01
   2.18300399e+00  2.26882198e+00  3.27899140e+01  4.25132782e+01
   2.81328065e+01  3.21982297e+01  1.59423057e+01  2.61139920e+01
   9.76980337e+00 -6.71678218e+00  6.66795986e+00  1.29625312e+00
   5.85094481e-01 -7.83572285e+00  1.21217533e+01 -7.41261047e+00
   1.77337915e+01 -4.34966275e+00  2.03614902e+01  6.98690835e+00
   1.29742687e+01  6.94943028e+00  1.56935303e+01  1.43921530e+01
   1.17680259e+01 -8.78712938e+00 -7.19922247e+00  1.73478164e+01
  -1.05946832e+01 -8.54091606e+00 -6.19568493e+00 -8.39035485e+00
   1.09834260e+00  2.73099209e+00 -7.08490979e+00  1.97293447e+00
   3.03181488e+00  5.10288429e+00 -2.68271477e+00  1.19430594e+01
   7.03484525e+00  1.31804333e+01  6.23829950e-01  1.49020570e+01
   6.49696527e+00  5.79646007e+00  3.90130898e+00 -1.89186212e+00
   2.95159316e+00 -2.63211967e+00  3.36914841e+00  6.14769947e+00
  -4.06608461e+00  3.09035166e+00  1.82369777e+00  2.33979033e+00
  -4.17834122e+00 -1.32151747e+00 -6.55560608e+00 -2.66257534e+00
  -1.23181641e+01  5.43621806e+00 -2.94833047e+00  3.33716515e+00
  -2.20009863e+00 -1.12276994e+01  1.13487494e+01  6.87080952e+00
   2.43594023e+00  5.32944231e+00  2.59458906e+00  3.48478463e+00
  -1.27146379e+00  7.91563974e+00 -7.55535980e-01 -1.44289916e+00
  -7.51121560e+00  1.02557700e+00 -5.15064258e+00 -1.39044334e+00
  -3.50127285e+00  1.68253420e+00  3.34304976e+00  3.15848396e-01
   3.46842683e+00 -2.96503348e+00 -8.95545172e-01  2.03605112e+00
  -3.55787035e-01  9.88889806e-01 -3.64872840e+00 -3.63343177e+00
  -6.82587483e-01 -3.75820682e+00  1.50367506e-01 -7.02512152e-01
  -2.68115486e+00  4.75271383e+00  6.33214599e+00 -2.74776585e+00
  -7.19864226e-01  6.76009254e-01 -1.12035816e+00  9.97585408e+00
   2.67661884e+00 -2.24864629e+00  4.67249835e+00  2.35726820e+00
   1.04291857e+00 -8.34675139e-01 -4.33319242e+00  3.79312951e+00
   5.25389374e-01 -2.16004292e-01  1.16732373e+00 -1.00910273e+00
   1.38212324e+00  1.53984228e+00  4.60134763e-01  1.55837384e+00
   5.30945724e+00  3.83208812e+00 -3.33133829e-02  7.04728397e+00
  -2.17519838e+00  6.57631899e-02  3.34911554e+00 -4.77366374e+00
   2.22955882e+00 -2.41131079e+00]
 [ 1.21155080e+02  5.69579855e+01  8.59574217e+01 -8.90387648e+00
   5.83568900e+00 -1.96499949e+00 -8.10543384e+00  6.02060928e+00
   3.19552444e+01  1.31808254e+01  1.59283873e+01  3.10154651e+01
   1.88027778e+01  2.58403124e+01  2.62826215e+00  8.23542693e+00
   1.47740144e+01  1.40477585e+01  2.42904579e+00 -4.03674602e+00
   1.38374695e+01  7.13853736e-01 -3.92898093e+00  8.39619411e+00
  -9.63460884e+00 -1.10448007e+01  1.78083534e+00  1.60590729e+01
  -1.53036151e+00  1.74104411e+00  1.96925477e+01  2.76890768e+00
   1.40218849e+00 -4.68297819e-02  4.04739331e+00  7.16478575e+00
  -2.97877322e+00  8.97028059e+00 -5.15740602e+00 -7.70992771e+00
   4.99441323e+00  1.86628770e+00  5.00946628e+00 -4.68720357e+00
   1.36217500e+00  2.45756563e+00 -1.61805194e+00  1.05122578e+01
  -9.75313676e+00  1.04049566e+01  5.81463086e+00  1.47587050e+01
   1.03745961e+00  8.72922199e+00  4.28917067e+00 -9.93370087e-01
  -7.44139341e+00  6.58413046e-01  2.33134417e+00 -7.91869649e-02
   2.87271847e+00  5.38412826e+00 -2.64958461e+00  8.32867779e+00
  -2.29284946e+00  5.59422767e+00 -6.72056205e+00 -1.49899758e+00
   1.41961380e+00  5.34642585e-03 -3.72711202e+00 -2.91780176e+00
   7.91193395e-01 -2.40421596e+00  5.18088128e+00  2.48257798e+00
   7.06513095e+00 -1.07903920e+00  1.66972042e+00  3.64407956e+00
   1.79319116e+00  6.23103749e+00 -4.14485452e-01  1.49018102e+00
   5.25858374e+00  9.50102583e-03  6.90986153e-01 -3.36395997e+00
   4.37609792e+00 -2.54459424e+00  2.66549863e+00  3.42770913e+00
   2.88629115e+00  4.21109210e+00  4.51663476e-02 -3.08647998e-01
  -1.65368269e-02 -5.37997134e-01 -1.96106627e+00  5.04807412e+00
   2.42097076e+00  8.58636567e-01 -3.68470964e+00 -6.94970009e+00
   2.34621605e+00  3.67986675e-01  6.26526097e+00 -2.11244220e+00
   9.67146969e-01  4.00301472e+00  6.03792199e-01 -4.76592998e-01
   1.77299683e+00  4.18998434e+00 -1.36890488e+00  2.55234290e+00
  -9.89569391e-01  9.08833789e-01 -9.30134393e-01  1.04770884e+00
   3.54327049e+00 -2.84059201e-01  1.36499272e+00 -2.80513656e-01
   2.69645794e+00 -4.02745034e+00 -7.75009515e-01 -1.63280442e+00
   2.00291244e+00  4.66345165e+00 -2.32433335e+00  1.08107955e+00
  -8.57033285e-02 -3.39287057e+00 -2.02288751e+00 -4.63060722e+00
  -7.04892480e-01  3.56802427e+00]]
******** n_components=148 ********
Explained Variance Ratio per Component: [0.09722155 0.03288487 0.02784989 0.01907246 0.01673716 0.01475282
 0.01010298 0.00927506 0.0078448  0.00695542 0.00637769 0.00568514
 0.00550618 0.00521396 0.00476945 0.00443479 0.00425115 0.00404316
 0.00396367 0.00368677 0.00345759 0.00327868 0.00311001 0.00305991
 0.00290327 0.00271685 0.00264588 0.00259079 0.0025124  0.00242961
 0.00233462 0.00227578 0.00215715 0.00214652 0.00208896 0.0020204
 0.00197381 0.0019118  0.00187727 0.00184389 0.00182903 0.00178583
 0.00174055 0.00169156 0.00167826 0.00163829 0.0016023  0.00156953
 0.00155863 0.00153429 0.00150422 0.00146964 0.00146244 0.00144059
 0.00141541 0.00140842 0.00139406 0.00137398 0.00136378 0.00135762
 0.00134286 0.00131147 0.00130776 0.0012972  0.00126373 0.00125844
 0.00124794 0.00124277 0.00122645 0.00122108 0.00120608 0.00119169
 0.00118944 0.00116828 0.00116415 0.00115609 0.00114682 0.00113491
 0.00113409 0.00112721 0.0011097  0.00110066 0.00109639 0.00108645
 0.00107875 0.00106683 0.00106324 0.0010566  0.00105383 0.00104563
 0.00104102 0.00103445 0.00102329 0.00101691 0.00101815 0.00101268
 0.00100395 0.00099704 0.00098853 0.00098686 0.00098071 0.00097813
 0.00096905 0.00096173 0.00095998 0.00095224 0.00094617 0.00093916
 0.00093397 0.00093285 0.00092343 0.00092009 0.00091592 0.00091324
 0.00090589 0.0008992  0.00089873 0.0008935  0.00088805 0.00088438
 0.00088216 0.00087752 0.00087629 0.00086635 0.00086408 0.00085937
 0.00085375 0.00085131 0.00084351 0.00084315 0.00083511 0.00083228
 0.00082587 0.00082547 0.00081797 0.00081588 0.00081135 0.00080422
 0.00080204 0.00079987 0.00079426 0.00078731 0.00078226 0.00078333
 0.00077738 0.00076816 0.00076821 0.00075807]
Explained Variance per Component: [237.2527247   80.24995646  67.96295031  46.54311306  40.84420643
  36.0017507   24.65460654  22.63420233  19.14389614  16.97351607
  15.56368001  13.87362379  13.43690363  12.72377828  11.63902532
  10.82235197  10.37420565   9.86664406   9.67265798   8.99693923
   8.43765673   8.00106833   7.58945248   7.46718937   7.08494047
   6.63001633   6.45683114   6.32239493   6.13107654   5.92906359
   5.69725098   5.55365999   5.26416869   5.23822525   5.09776329
   4.93044903   4.81673691   4.66542677   4.58116007   4.49969611
   4.46342576   4.35800931   4.24752318   4.12796646   4.09550717
   3.9979672    3.91014837   3.83017134   3.80356511   3.74416452
   3.67080384   3.58640446   3.56884565   3.51551289   3.45407602
   3.43700599   3.40197941   3.35295833   3.32807476   3.31304421
   3.2770163    3.20042336   3.19137472   3.16559959   3.08392335
   3.07099773   3.04539588   3.03276322   2.99294149   2.97983022
   2.94323641   2.90812617   2.90263402   2.85099685   2.84090207
   2.82123802   2.79860957   2.76956564   2.76756622   2.75076248
   2.70803431   2.68597153   2.67555898   2.65130273   2.6325026
   2.60342704   2.59466596   2.57844974   2.57169057   2.55168045
   2.54043711   2.52440787   2.49715525   2.48158915   2.48463311
   2.47126598   2.44997163   2.43311065   2.4123507    2.40826269
   2.39325742   2.38696705   2.36481347   2.34693867   2.34266702
   2.32377745   2.30897805   2.29187055   2.27920404   2.27645778
   2.2534764    2.24531706   2.23514013   2.22859858   2.21067909
   2.19433894   2.19319641   2.18043608   2.16713094   2.15816997
   2.15276471   2.14143277   2.13844642   2.11418931   2.1086473
   2.09714915   2.08343622   2.07748367   2.05844646   2.05756117
   2.03794599   2.03104899   2.01539596   2.01442324   1.99611147
   1.99101102   1.97995423   1.96255896   1.95723075   1.95194973
   1.93824636   1.92129353   1.908976     1.91158137   1.89705798
   1.87457631   1.87468402   1.84994565]
Total Variance Explained by all components: 0.4576049822021624
Shape of the reduced data:
(56318, 148)
Reduced data sample: [[ 1.33003764e+02  3.11663647e+01  1.23427211e+02  1.54590266e+01
  -2.34470218e+01 -6.45674037e+00  1.28157026e+01  4.84912815e+01
   2.18300304e+00  2.26881719e+00  3.27898771e+01  4.25133142e+01
   2.81327710e+01  3.21981439e+01  1.59422979e+01  2.61141490e+01
   9.76991255e+00 -6.71704240e+00  6.66844012e+00  1.29681494e+00
   5.84256306e-01 -7.83346938e+00  1.21228853e+01 -7.41565921e+00
   1.77309191e+01 -4.35092874e+00  2.03564048e+01  6.98563042e+00
   1.29686959e+01  6.96190308e+00  1.56976959e+01  1.44100342e+01
   1.17960485e+01 -8.78713746e+00 -7.21225817e+00  1.73323683e+01
  -1.05578737e+01  8.59186730e+00 -6.23742589e+00 -8.40782911e+00
   1.06972936e+00 -2.71437510e+00 -7.23572668e+00  1.88739989e+00
   3.02450067e+00  5.31348841e+00 -2.75667130e+00  1.18315552e+01
   6.68536990e+00  1.33467362e+01  8.30361275e-01  1.49020822e+01
   6.62619799e+00  5.60272109e+00  3.51095420e+00  1.96174238e+00
   3.23466954e+00 -2.95183128e+00  4.28353496e+00  5.49142634e+00
  -4.11617832e+00 -3.16330308e+00  1.28463490e+00  1.71917619e+00
  -4.80106075e+00  1.02979090e+00 -7.31453745e+00 -3.72140151e+00
  -8.83719300e+00 -9.22566489e+00 -3.76634006e+00  3.04271249e+00
   6.07897705e-01  4.03036773e+00  1.48607520e+01 -8.18677560e+00
  -9.82089403e-01  4.60647918e+00  2.49196896e+00  2.75281126e+00
  -3.39066111e-01  6.67800175e+00 -4.36770820e+00 -4.13080474e-01
  -7.51928575e+00 -3.83810728e+00 -3.94873717e+00  4.61170207e-01
  -5.98961557e+00  4.35205957e+00 -1.21042530e+00  1.24171255e+00
  -1.19489490e+00 -7.72976640e+00  1.08989666e+00 -5.95050132e-01
   1.89156814e+00 -2.33987044e+00 -1.46731989e+00  2.03699925e+00
   4.85951524e-01  8.93009834e-01 -1.38589265e+00  1.21993830e+00
   9.14786581e-01 -6.48722593e+00  2.35657118e-01 -8.66390846e-01
   9.98434887e-01 -4.96248643e+00  6.81498026e+00 -6.84948398e+00
   2.19031328e-01  6.37366091e+00  1.80430282e+00 -5.62809365e+00
   1.63262228e-01  3.47467402e+00  1.15860587e+00  3.95991231e+00
  -5.56022916e+00 -1.62761708e+00 -6.21199379e-01 -1.42917074e+00
   1.46432520e+00  3.98206818e+00  1.64100787e+00  7.53965157e-02
   1.58645496e+00 -3.06138629e+00 -6.35329966e+00  5.94956136e+00
   3.83149107e+00  1.15166265e+00  9.47001882e-01 -1.79088035e-01
  -5.70777520e+00 -2.10943542e+00  3.96810937e+00 -2.89994233e+00
   3.81291856e+00  3.03842278e+00 -4.31556754e-01 -3.68970973e+00
  -2.06710099e-01  1.08946342e+00  3.13787230e-01  1.88406187e+00]
 [ 1.21155080e+02  5.69579855e+01  8.59574217e+01 -8.90387651e+00
   5.83568896e+00 -1.96499961e+00 -8.10543487e+00  6.02060882e+00
   3.19552406e+01  1.31808339e+01  1.59283920e+01  3.10154998e+01
   1.88028590e+01  2.58402036e+01  2.62840258e+00  8.23556061e+00
   1.47738536e+01  1.40477267e+01  2.42947076e+00 -4.03616936e+00
   1.38386476e+01  7.13954360e-01 -3.92717213e+00  8.39246773e+00
  -9.63667872e+00 -1.10449719e+01  1.78272809e+00  1.60597717e+01
  -1.53078845e+00  1.73845781e+00  1.96886931e+01  2.77718778e+00
   1.42841984e+00 -4.26783458e-02  4.05366258e+00  7.16220677e+00
  -2.97770025e+00 -8.92915961e+00 -5.21488270e+00 -7.68839391e+00
   4.95978046e+00 -1.86771412e+00  4.96044281e+00 -4.65245706e+00
   1.26691224e+00  2.64094912e+00 -1.53658427e+00  1.04235261e+01
  -9.94311217e+00  1.02064536e+01  5.93484724e+00  1.49447017e+01
   9.37164909e-01  8.88476391e+00  3.96570115e+00  1.40510782e+00
  -7.47930529e+00  4.35441673e-01  2.05453050e+00  2.42013421e-02
   3.07557902e+00 -6.01438048e+00 -2.86474270e+00  8.25345599e+00
  -2.51543811e+00 -6.04805416e+00 -5.95363937e+00 -1.83549793e+00
   1.20596313e+00  9.85627505e-01 -3.55177364e+00 -3.01403995e+00
  -1.24758821e+00  2.60079137e-01  4.21290781e+00 -6.46968355e+00
   4.53897395e+00  1.86383040e+00 -3.17459330e+00  3.49329878e+00
   1.00165518e+00  7.11275164e+00 -1.49939307e+00  1.64334749e+00
   3.20092339e+00  2.05959429e+00  5.98915964e-01 -3.17294046e+00
   2.95420537e+00 -1.28314834e+00  6.38630001e-01  6.04111577e+00
   7.55293762e-01  2.89710161e+00 -2.47839412e+00  2.26462143e+00
   4.52873988e+00  1.86614672e-01  3.72580004e+00  3.49369333e+00
   1.34922376e+00 -2.62401633e+00  6.13245419e+00  1.12364535e+00
   3.02193664e+00  5.23608604e-01  9.32608813e-01  4.36231080e-01
   4.04391942e-01 -6.35225247e+00  5.00609598e+00  4.27012928e-01
   1.30247270e+00  2.57632865e+00 -4.67600219e+00 -2.11905188e+00
   2.08703511e+00  1.11108938e+00  1.93274349e+00  2.47694415e+00
   1.35458938e+00 -1.85759995e+00  4.44637773e-01 -4.66745168e-01
   4.06913351e+00 -3.09301713e+00  3.79862622e+00 -8.85983031e-01
  -1.11754915e+00  2.42169786e+00 -2.88559009e+00  2.80527462e+00
   2.84991464e+00  4.96102663e-01  4.77445573e-01  2.17104552e+00
  -3.81716057e+00 -1.30021212e+00 -1.47122611e+00  9.35902323e-01
   1.90326302e+00 -1.47778535e-01  5.69688256e-01 -2.47655206e+00
  -1.67065031e+00  3.82650046e+00 -2.28048457e-01  2.36566168e+00]]
******** n_components=158 ********
Explained Variance Ratio per Component: [0.09722155 0.03288487 0.02784989 0.01907246 0.01673716 0.01475282
 0.01010298 0.00927506 0.0078448  0.00695542 0.00637769 0.00568514
 0.00550618 0.00521396 0.00476945 0.00443479 0.00425115 0.00404316
 0.00396367 0.00368677 0.00345759 0.00327868 0.00311001 0.00305991
 0.00290327 0.00271685 0.00264588 0.0025908  0.0025124  0.00242961
 0.00233462 0.00227578 0.00215716 0.00214653 0.00208898 0.00202043
 0.00197382 0.00191181 0.00187726 0.00184387 0.00182903 0.00178589
 0.00174057 0.00169159 0.0016783  0.0016383  0.00160216 0.00156951
 0.00155885 0.00153429 0.00150434 0.00146999 0.00146258 0.00144093
 0.00141553 0.00140851 0.00139407 0.00137494 0.00136374 0.00135775
 0.00134336 0.00131252 0.00130838 0.00129793 0.00126471 0.0012591
 0.00124889 0.00124253 0.00122849 0.00122224 0.00120787 0.00119384
 0.00119062 0.00116995 0.00116488 0.00115731 0.00114841 0.00113613
 0.00113085 0.00112883 0.00111079 0.00110488 0.00109559 0.00108759
 0.00108058 0.00106852 0.00106488 0.00106029 0.00105873 0.0010489
 0.001045   0.00103505 0.00102968 0.00101943 0.00101659 0.00100983
 0.00100482 0.00099745 0.00099491 0.00098324 0.00098342 0.00097812
 0.00096944 0.00096685 0.00095785 0.00095394 0.00094227 0.00093991
 0.00093827 0.000936   0.00092925 0.00092284 0.00091937 0.00091298
 0.00090835 0.00090725 0.00089895 0.00089524 0.00089239 0.00088929
 0.00088598 0.00088159 0.00088259 0.00087564 0.0008685  0.00086827
 0.00086707 0.00085935 0.00085346 0.00084809 0.00084446 0.00084205
 0.00083872 0.00083534 0.00083208 0.00082362 0.00082105 0.00081876
 0.00081625 0.00080886 0.00080368 0.00079944 0.00079519 0.0007897
 0.00078662 0.00078569 0.00077644 0.00077511 0.00077055 0.00076781
 0.00076463 0.0007544  0.00075484 0.00074984 0.00074691 0.00074365
 0.00074107 0.00073871]
Explained Variance per Component: [237.2527247   80.24995646  67.96295031  46.54311306  40.84420643
  36.0017507   24.65460654  22.63420233  19.14389614  16.97351607
  15.56367999  13.87362386  13.43690363  12.72377828  11.6390253
  10.82235192  10.37420572   9.86664408   9.67265803   8.99693939
   8.43765707   8.00106861   7.5894531    7.46718956   7.08493998
   6.63001861   6.45683025   6.32240024   6.13108177   5.92906317
   5.69725634   5.55365515   5.2641917    5.23824559   5.09779208
   4.93050688   4.8167707    4.66544579   4.5811415    4.49965896
   4.46344024   4.3581563    4.24757262   4.12804293   4.09560914
   3.99799521   3.90979531   3.8301206    3.80410551   3.74418052
   3.67109831   3.5872725    3.56916816   3.5163557    3.45436598
   3.43722793   3.40199911   3.35529736   3.32797569   3.31335969
   3.27823194   3.20299474   3.19287077   3.16737018   3.08631511
   3.07262666   3.04770739   3.03218361   2.99791219   2.982668
   2.94759947   2.91335899   2.90550378   2.85507025   2.84270301
   2.82420803   2.80250359   2.77252889   2.75965042   2.75470937
   2.71070013   2.6962684    2.67360024   2.65408439   2.6369672
   2.60753744   2.59866191   2.58745513   2.58364139   2.55966208
   2.55013769   2.52586879   2.512758     2.48773408   2.48081268
   2.46433102   2.45210457   2.43410065   2.42792055   2.39943734
   2.39987067   2.38694719   2.36576316   2.35944146   2.33747787
   2.32792199   2.29945432   2.2936847    2.28968507   2.28414747
   2.26766589   2.25204667   2.24357558   2.22797482   2.21667032
   2.21398813   2.1937433    2.18467079   2.1777334    2.17015843
   2.1620793    2.15138264   2.15381071   2.13685093   2.11943649
   2.11887029   2.1159352    2.0970927    2.08273284   2.069608
   2.0607551    2.05488467   2.04676503   2.03849952   2.0305599
   2.00991331   2.00363631   1.99804648   1.99191414   1.97389736
   1.96125548   1.95090699   1.94053868   1.92712909   1.91961415
   1.91733368   1.89476127   1.89153479   1.88038991   1.87369885
   1.8659481    1.84098203   1.84205029   1.82985642   1.82269702
   1.81475842   1.80846574   1.80268857]
Total Variance Explained by all components: 0.46551669359952574
Shape of the reduced data:
(56318, 158)
Reduced data sample: [[ 1.33003764e+02  3.11663647e+01  1.23427211e+02  1.54590266e+01
  -2.34470218e+01 -6.45674028e+00  1.28157026e+01  4.84912822e+01
   2.18299591e+00  2.26882739e+00  3.27898601e+01  4.25132938e+01
   2.81328090e+01  3.21981442e+01  1.59423050e+01  2.61141167e+01
   9.76979423e+00 -6.71685973e+00  6.66834811e+00  1.29710257e+00
   5.84879900e-01 -7.83434260e+00  1.21211368e+01 -7.41498724e+00
   1.77303645e+01 -4.34884518e+00  2.03589096e+01  6.98735644e+00
   1.29630213e+01  6.95527227e+00  1.56929729e+01  1.43996570e+01
   1.17798540e+01 -8.79866696e+00 -7.19475495e+00  1.73040507e+01
  -1.05548886e+01  8.58943357e+00 -6.24998041e+00 -8.38161847e+00
   1.10175225e+00 -2.73807231e+00 -7.18014502e+00  1.90414950e+00
   2.95140637e+00  5.27426273e+00 -2.67987450e+00  1.18403014e+01
   6.95769024e+00  1.33179949e+01  8.33801807e-01  1.51558652e+01
   6.39609035e+00  5.78670080e+00  3.47887587e+00 -2.07315511e+00
   3.02084107e+00 -3.09873375e+00  3.50079517e+00  6.47645873e+00
  -3.97646943e+00 -2.73104851e+00  1.71340393e+00  1.49765845e+00
  -4.22931355e+00 -1.56976946e+00 -7.05056056e+00 -3.58304567e+00
  -1.09152516e+01 -8.76498120e+00 -1.99662226e+00  1.72168076e+00
   2.07214885e+00 -5.14283200e+00  1.42205496e+01  8.51284088e+00
  -4.86904476e-01  4.82457074e+00  3.96407206e+00 -9.57015987e-01
  -6.56494362e-01 -9.43038945e+00  2.41451292e+00  5.83604046e-01
  -7.52679897e+00 -2.20399944e+00  6.12202783e+00  4.26695145e+00
  -1.89286848e+00 -8.45210543e-01 -5.11902593e+00  3.12674681e+00
   3.22946998e+00  4.00594152e+00 -4.97808999e+00 -1.69849605e+00
  -1.53362970e+00  2.74787810e+00 -7.67092126e-01  3.37407294e+00
   2.72821849e+00 -9.84523060e-01 -1.10741729e-01 -1.06294695e+00
   2.16295409e+00  5.13316140e-01  4.44105027e+00  1.71898169e+00
  -6.28694144e-01 -3.66343466e+00 -1.10917647e+00  7.17678737e+00
   2.39041565e+00  6.45545535e-01  2.62954215e+00 -8.05480457e-01
   3.02439191e+00  9.20392216e+00  2.63251978e+00 -4.46179373e+00
   1.09809075e+00 -1.45548137e+00  3.16841930e+00 -4.81892807e+00
   2.13238538e+00  1.64746639e+00 -2.24094510e+00  2.05683908e+00
   6.24932919e-01 -7.11209122e-01 -3.63843634e+00  3.94341227e+00
  -3.10157257e+00  2.06468272e-01  5.88251772e+00  3.23296226e+00
   2.83012606e+00  4.46647936e+00 -5.10419458e+00  2.54029362e-01
   5.18790480e+00  6.35902334e-01 -4.09113572e+00  9.66858224e-01
   3.15968298e+00 -5.81009197e+00 -1.29024783e+00  3.52343011e+00
   4.66193231e-01  4.28245966e-02 -1.96579025e+00 -7.71626021e-02
   1.31810834e+00 -1.82016502e+00  1.31914721e+00  1.81382817e+00
   3.17765953e+00 -2.80746558e+00]
 [ 1.21155080e+02  5.69579855e+01  8.59574217e+01 -8.90387654e+00
   5.83568898e+00 -1.96499962e+00 -8.10543415e+00  6.02060860e+00
   3.19552406e+01  1.31808316e+01  1.59283709e+01  3.10155244e+01
   1.88028728e+01  2.58402603e+01  2.62850969e+00  8.23553839e+00
   1.47738718e+01  1.40476736e+01  2.42958545e+00 -4.03675325e+00
   1.38384229e+01  7.13664599e-01 -3.92739197e+00  8.39356370e+00
  -9.63623126e+00 -1.10431549e+01  1.78366763e+00  1.60592189e+01
  -1.53246083e+00  1.74068558e+00  1.96920884e+01  2.76486918e+00
   1.42703599e+00 -5.11432109e-02  4.05110757e+00  7.15796455e+00
  -2.98302691e+00 -8.95678823e+00 -5.25470276e+00 -7.66734230e+00
   4.97688383e+00 -1.86649170e+00  5.03361805e+00 -4.58530843e+00
   1.25863294e+00  2.67262711e+00 -1.46000031e+00  1.03366226e+01
  -9.95227429e+00  1.04132482e+01  5.84228715e+00  1.49576749e+01
   8.97025992e-01  8.98315921e+00  4.20048047e+00 -1.50265054e+00
  -7.46300239e+00  6.12428787e-01  2.09406087e+00  2.07325887e-01
   3.30982237e+00 -5.89504639e+00 -1.54023257e+00  8.27869202e+00
  -2.64504578e+00  5.53566762e+00 -7.05988171e+00 -2.65546327e+00
   9.18205240e-01  3.19347063e-02 -3.53606314e+00 -3.58748394e+00
  -9.18974483e-01 -3.16776178e-01  4.40318510e+00  6.07903318e+00
   5.00711157e+00 -1.86450408e+00  5.07600891e+00 -1.65801550e-01
   1.67101762e+00 -6.63061996e+00 -1.19857260e+00 -2.00499166e+00
   3.63109770e+00  2.89302098e+00 -1.39862909e+00 -3.00625260e+00
  -2.90702995e+00 -1.46801170e-01  2.32500283e+00  4.92700133e+00
  -1.17608556e+00 -1.16538023e+00  4.59755679e+00 -2.33836202e+00
   3.64049732e+00 -2.48305951e+00  2.02693664e+00 -1.47494150e+00
   3.30840356e+00  4.95168979e+00 -3.94657521e+00 -3.77907161e+00
   9.73208196e-01  4.60866636e+00  5.70798622e+00 -2.46850890e-01
   5.83508866e-01 -3.87690908e+00 -1.42124990e+00  3.91524808e+00
   1.19780900e+00  1.22820677e+00  7.13767126e-01  1.52048948e+00
   1.54287057e+00  5.06026023e+00 -3.76372369e-01  4.03908883e+00
  -1.28239727e-01 -3.39694757e-01 -2.77110082e+00  1.24315096e+00
   4.00677496e+00 -5.17504145e-01  2.94777364e+00  1.24349704e-01
   1.55437348e+00  1.31621792e+00  3.11381635e+00 -8.75503801e-01
  -4.42230909e-01 -2.43486389e+00  5.97130283e+00  2.79388172e-01
   8.67157254e-01  2.09193378e+00  1.12591224e+00  2.30864971e+00
   5.13130754e-01  2.89295412e+00 -1.33760314e+00  2.04612342e+00
  -1.99991602e+00 -1.20933493e+00 -4.91094382e-03 -3.10085946e-01
   6.54728839e-01  2.81555523e+00 -8.63587708e-01 -1.79929371e+00
   1.48356744e+00  9.24651110e-01 -2.13109913e+00  4.51462847e+00
  -1.39384632e+00 -2.51654428e+00]]
******** n_components=168 ********
Explained Variance Ratio per Component: [0.09722155 0.03288487 0.02784989 0.01907246 0.01673716 0.01475282
 0.01010298 0.00927506 0.0078448  0.00695542 0.00637769 0.00568514
 0.00550618 0.00521396 0.00476945 0.00443479 0.00425115 0.00404316
 0.00396367 0.00368677 0.00345759 0.00327868 0.00311001 0.00305991
 0.00290327 0.00271685 0.00264588 0.0025908  0.0025124  0.00242962
 0.00233462 0.00227578 0.00215716 0.00214654 0.00208898 0.00202042
 0.00197383 0.00191181 0.00187729 0.0018439  0.00182904 0.0017859
 0.00174059 0.0016916  0.00167831 0.00163836 0.00160237 0.00156953
 0.00155887 0.00153447 0.00150441 0.00147002 0.00146275 0.00144095
 0.00141554 0.00140865 0.00139405 0.00137512 0.00136417 0.00135793
 0.00134358 0.00131265 0.00130837 0.00129786 0.00126465 0.0012585
 0.00124963 0.0012432  0.00122884 0.00122249 0.00120871 0.00119447
 0.00119136 0.00117019 0.00116626 0.00115722 0.0011499  0.00113798
 0.00113485 0.00112998 0.00111074 0.00110585 0.00109559 0.00108785
 0.00108163 0.00106922 0.00106545 0.00106084 0.00105962 0.00105142
 0.00104517 0.00103989 0.00103195 0.00102396 0.00101748 0.00101188
 0.00100922 0.00099876 0.00099732 0.00098895 0.00098647 0.00098169
 0.00097305 0.00097014 0.00096055 0.0009572  0.00094762 0.00094379
 0.00094045 0.0009373  0.0009322  0.00092903 0.00092839 0.00091802
 0.0009168  0.00090885 0.0009037  0.00090123 0.00089836 0.00089315
 0.00088904 0.00088553 0.00088294 0.00087903 0.00087691 0.00087292
 0.00086863 0.00086282 0.0008597  0.0008556  0.00085011 0.00084648
 0.00084346 0.00084029 0.00083757 0.00083209 0.00083005 0.00082329
 0.00081798 0.00081539 0.00081381 0.00080769 0.00080627 0.00080114
 0.00079787 0.0007943  0.00078969 0.0007846  0.00078152 0.00078114
 0.00077614 0.00077208 0.0007678  0.00076781 0.00076062 0.00075912
 0.0007521  0.00074736 0.00074669 0.00074159 0.00073973 0.00073382
 0.00073195 0.00072649 0.00071859 0.00071566 0.00071008 0.0007025 ]
Explained Variance per Component: [237.2527247   80.24995646  67.96295031  46.54311306  40.84420643
  36.0017507   24.65460654  22.63420233  19.14389615  16.97351607
  15.56368002  13.87362382  13.43690363  12.72377825  11.63902536
  10.8223519   10.37420573   9.86664454   9.67265805   8.99693963
   8.43765726   8.00106883   7.58945299   7.46718894   7.0849392
   6.63001766   6.45683143   6.32240124   6.13108254   5.92906678
   5.69725264   5.55365856   5.26419532   5.23825853   5.09779466
   4.93050263   4.81680583   4.66545172   4.58120876   4.49973537
   4.4634662    4.35817603   4.2476042    4.12805295   4.09563471
   3.99813519   3.91032449   3.8301785    3.80416029   3.74460979
   3.67126437   3.58733458   3.56959985   3.51640556   3.45438719
   3.43756828   3.40194273   3.35574239   3.32903049   3.31380818
   3.27878712   3.20330855   3.19285226   3.16719808   3.08615321
   3.07116474   3.04951639   3.03380912   2.99878743   2.98327526
   2.94965612   2.91489505   2.9073239    2.85565536   2.84606679
   2.82399441   2.80614573   2.77705811   2.76940614   2.75753528
   2.71056865   2.69862847   2.67359884   2.65470587   2.6395326
   2.60924735   2.60005914   2.58879679   2.58582988   2.56581165
   2.55054854   2.53768445   2.51830938   2.49880812   2.48298675
   2.46931725   2.46283005   2.43731035   2.43380251   2.4133718
   2.40731748   2.39564693   2.37456268   2.36746008   2.34407177
   2.33587671   2.31250073   2.30316423   2.29500956   2.28732246
   2.27488828   2.26712904   2.26558398   2.24026089   2.2372965
   2.21790381   2.20531794   2.19930633   2.19229854   2.17959299
   2.16954389   2.16097377   2.15467464   2.1451247    2.13994932
   2.1302084    2.11974987   2.10556746   2.09794647   2.08795482
   2.07455316   2.06569159   2.05831111   2.05058896   2.04394041
   2.03057725   2.02560438   2.00908785   1.99615345   1.98981393
   1.985957     1.97103102   1.96755826   1.95504129   1.94706733
   1.93834948   1.92710528   1.91468345   1.90717849   1.90624725
   1.89403252   1.88413814   1.87368877   1.8737153    1.8561651
   1.85250714   1.83536229   1.8238152    1.82215993   1.80972701
   1.8051833    1.79077091   1.78619898   1.77286746   1.75359955
   1.74645837   1.7328366    1.7143234 ]
Total Variance Explained by all components: 0.47323643557300404
Shape of the reduced data:
(56318, 168)
Reduced data sample: [[ 1.33003764e+02  3.11663647e+01  1.23427211e+02  1.54590266e+01
  -2.34470218e+01 -6.45674031e+00  1.28157024e+01  4.84912814e+01
   2.18300035e+00  2.26881568e+00  3.27898774e+01  4.25133171e+01
   2.81328334e+01  3.21981215e+01  1.59423919e+01  2.61139113e+01
   9.76991568e+00 -6.71713208e+00  6.66852910e+00  1.29680457e+00
   5.85248068e-01 -7.83456712e+00  1.21212690e+01 -7.41438500e+00
   1.77332748e+01 -4.35015841e+00  2.03572127e+01  6.99006504e+00
   1.29644929e+01  6.95262244e+00  1.56921566e+01  1.44050895e+01
   1.17774239e+01 -8.78874238e+00 -7.21246277e+00  1.73198109e+01
  -1.05545975e+01 -8.57888237e+00 -6.22099774e+00 -8.38564371e+00
   1.10216267e+00 -2.74514131e+00 -7.14413287e+00  1.89712010e+00
   3.03918039e+00  5.28003339e+00 -2.62830964e+00  1.18656522e+01
   6.81631432e+00  1.34075652e+01  8.42959587e-01  1.49210961e+01
   6.72100493e+00  5.54015328e+00  3.64449822e+00  2.11765193e+00
   2.96801938e+00 -2.94803631e+00  4.68967120e+00  5.67310346e+00
  -4.07800135e+00 -2.63051113e+00  1.66480234e+00  1.73700932e+00
  -4.41948011e+00 -2.32734500e+00 -6.13949422e+00 -3.73523373e+00
  -1.15999201e+01 -7.01262050e+00 -3.99850770e+00  2.07213599e+00
   3.14813282e+00 -5.95524590e+00  1.44170450e+01 -7.33361117e+00
  -2.80215796e-01  4.46756669e+00  3.20046661e+00  3.91369726e+00
  -4.58303453e-01 -7.37502316e+00 -2.80344520e+00 -1.66510217e+00
   6.34415273e+00 -1.76737556e-02  7.87351914e+00  4.52847609e+00
  -5.64690881e-01  1.97196949e+00 -5.65308815e+00 -2.86830505e+00
   2.72618859e-01 -2.05378359e-01 -5.56671201e+00  2.99938172e+00
   3.02371857e+00  2.58873384e+00  1.37658001e+00 -1.66861873e+00
  -2.97162967e-02  3.15362558e+00  6.76658174e-01  2.02062419e+00
   4.85763166e+00 -8.85210629e-01 -1.60757681e-01 -2.90101439e+00
   6.14269701e+00 -3.14730783e+00  1.02965441e+00 -1.91834586e+00
  -3.17766110e+00  4.92758861e+00 -1.08212641e+00  8.49865681e+00
  -8.10740917e-01  7.89793120e+00 -1.77157442e+00 -6.47396916e-01
  -1.23867166e+00 -3.87394219e-01  4.03872831e+00  5.63559412e-01
  -2.62694945e+00 -1.09992254e+00 -1.66024345e+00 -3.01089784e-01
   5.22975851e+00  1.90674634e+00 -5.54199981e+00  4.90805180e+00
  -6.78627094e-01  1.40957064e+00  6.98311686e-01  5.14927765e+00
  -2.28575421e+00 -3.53672516e+00  3.36727376e+00  1.11790536e+00
   3.82061886e+00 -9.27336676e-02 -4.96049887e-02  4.58163571e+00
   1.03943508e+00  2.64665260e+00 -1.17668456e+00  9.75755726e-01
   4.46669236e-01  5.12158770e+00  1.73782202e+00  1.26961580e+00
   5.49285541e+00  1.18339027e+00  1.26344232e+00 -2.06674238e+00
   7.37600120e-01  1.99009532e+00  8.96243566e+00 -5.85710484e-01
  -5.84685733e-02 -4.35887223e+00  3.59732821e+00  9.46606812e-01
  -5.17589537e+00  2.46509291e+00  3.52817434e+00  1.62573887e+00]
 [ 1.21155080e+02  5.69579855e+01  8.59574217e+01 -8.90387650e+00
   5.83568897e+00 -1.96499954e+00 -8.10543452e+00  6.02060866e+00
   3.19552386e+01  1.31808243e+01  1.59283834e+01  3.10155026e+01
   1.88029127e+01  2.58402518e+01  2.62850002e+00  8.23545861e+00
   1.47738161e+01  1.40476858e+01  2.42933205e+00 -4.03646996e+00
   1.38384933e+01  7.13874474e-01 -3.92804261e+00  8.39280357e+00
  -9.63666834e+00 -1.10409326e+01  1.78188304e+00  1.60605032e+01
  -1.52919367e+00  1.73874823e+00  1.96933455e+01  2.77714040e+00
   1.41819935e+00 -4.78984861e-02  4.04204607e+00  7.15880220e+00
  -2.97603574e+00  8.92545534e+00 -5.20503233e+00 -7.67342323e+00
   5.00380628e+00 -1.88218167e+00  5.00335673e+00 -4.62107851e+00
   1.30864548e+00  2.65535877e+00 -1.60200032e+00  1.04003485e+01
  -9.86490841e+00  1.03584478e+01  5.92337901e+00  1.49904867e+01
   1.20148763e+00  8.65037558e+00  4.31801168e+00  1.19149528e+00
  -7.44420653e+00  5.85868364e-01  2.23710484e+00 -2.18292312e-02
   3.20208226e+00 -6.11246539e+00 -1.84854089e+00  8.28114987e+00
  -3.09985196e+00  4.92653011e+00 -6.58173898e+00 -2.76525386e+00
   8.08288754e-01  4.24883860e-01 -4.15765497e+00 -3.06995618e+00
  -8.43132862e-01 -1.13524535e+00  5.31398487e+00 -5.79898420e+00
   5.04171298e+00 -1.96923037e+00  3.59109258e+00  4.13773927e+00
   1.84423191e+00 -5.47525940e+00 -2.98655248e-01  9.23499311e-01
  -3.81570146e+00  3.20742555e-01 -4.22229914e+00  7.67854557e-01
  -3.62677730e+00  3.57362884e-01  1.88815801e+00 -5.09993825e+00
  -1.31047317e+00  4.32560525e-01  5.39549378e+00  6.99519663e-01
   2.09982507e+00 -2.02914718e+00  6.82731423e-01  2.20981727e+00
   7.11562327e+00 -3.26933552e+00 -1.06162570e+00 -4.10341132e+00
   6.20432036e+00  4.40754633e-01 -2.83324530e+00 -2.00931696e+00
   5.81594534e+00 -5.20869067e-01  2.25942090e+00 -3.83319379e+00
  -5.53588959e+00 -3.16623009e-01 -1.68789390e+00 -1.37348067e+00
   1.11774668e+00 -1.31494329e+00  5.74541988e-01  4.11348076e-01
  -1.65275131e+00  5.25039150e+00  3.74721222e+00  2.03287428e+00
  -2.93048084e+00  1.49128102e-01 -9.45836644e-01 -2.99585565e+00
   5.32498736e+00 -1.57531835e+00 -3.11580143e-01  3.45921344e+00
  -4.84604596e+00 -1.58035136e+00  2.12196769e+00  2.17027540e+00
   1.04609651e+00  1.11771942e+00  2.83769645e+00 -2.75043668e+00
   1.26367835e+00  1.19803501e+00  6.34458129e-01 -1.32603796e+00
  -3.11676171e+00  2.14255136e+00  1.00005183e+00 -3.02148429e+00
  -1.40977710e+00  5.54866029e-01  1.01861637e+00 -1.90890998e+00
   1.32015765e-01 -2.53117743e-01  3.90543191e+00  2.32306797e+00
   2.40820827e+00  9.19427314e-01  5.50536841e+00  4.86856667e-01
   3.68662395e+00  2.76912123e+00 -2.04189231e+00  9.97291290e-01
  -2.15815085e-01  2.81639760e+00  9.15269430e-01 -1.39053588e+00]]
******** n_components=178 ********
Explained Variance Ratio per Component: [0.09722155 0.03288487 0.02784989 0.01907246 0.01673716 0.01475282
 0.01010298 0.00927506 0.0078448  0.00695542 0.00637769 0.00568514
 0.00550618 0.00521396 0.00476945 0.00443479 0.00425115 0.00404316
 0.00396367 0.00368677 0.00345759 0.00327868 0.00311001 0.00305991
 0.00290327 0.00271685 0.00264588 0.0025908  0.0025124  0.00242962
 0.00233462 0.00227578 0.00215717 0.00214654 0.00208898 0.00202043
 0.00197383 0.00191183 0.0018773  0.00184391 0.00182904 0.0017859
 0.0017406  0.00169165 0.00167836 0.00163834 0.00160241 0.00156955
 0.00155891 0.00153451 0.00150447 0.0014701  0.00146271 0.00144114
 0.00141574 0.00140889 0.00139431 0.00137517 0.00136415 0.00135816
 0.00134348 0.00131302 0.00130876 0.00129819 0.00126546 0.00125939
 0.00125028 0.0012434  0.00122935 0.00122287 0.00120967 0.0011949
 0.00119145 0.00117072 0.001167   0.0011577  0.00114946 0.00113876
 0.00113628 0.00112917 0.00111108 0.00110622 0.00109779 0.00108914
 0.00108261 0.00107094 0.00106793 0.00106339 0.00106126 0.00105169
 0.00104643 0.0010408  0.00103404 0.00102338 0.0010204  0.00101376
 0.00100582 0.00100197 0.00099811 0.00099215 0.00098813 0.00098547
 0.00097539 0.00097307 0.00096597 0.00096267 0.00095121 0.00094643
 0.00094211 0.00094087 0.00093931 0.00093547 0.00093199 0.00092144
 0.00091302 0.0009111  0.00090554 0.00090355 0.0008993  0.0008972
 0.00089438 0.00088961 0.0008871  0.00088173 0.00088131 0.00087829
 0.00087239 0.00087168 0.00086502 0.00086433 0.00085888 0.00084961
 0.00084905 0.00084865 0.00084358 0.00083872 0.00083586 0.00083459
 0.0008278  0.00082486 0.00082201 0.00081709 0.000815   0.00080884
 0.00080484 0.00079928 0.00079638 0.000796   0.00079245 0.00079087
 0.00078689 0.00077847 0.00077752 0.00077381 0.00076976 0.00076746
 0.00076437 0.00076224 0.00075206 0.00074972 0.00074797 0.0007457
 0.00074249 0.0007354  0.00073307 0.0007291  0.00072383 0.00072137
 0.00071777 0.00071247 0.00071144 0.00070749 0.0007061  0.00070132
 0.00069788 0.00069331 0.00068581 0.00068438]
Explained Variance per Component: [237.2527247   80.24995646  67.96295031  46.54311306  40.84420643
  36.0017507   24.65460654  22.63420233  19.14389614  16.97351606
  15.56367999  13.87362382  13.43690363  12.72377823  11.63902532
  10.8223519   10.37420571   9.86664461   9.67265817   8.99693944
   8.43765707   8.00106852   7.58945288   7.46718884   7.08493975
   6.63001982   6.45683225   6.32239806   6.13108334   5.92906892
   5.69725613   5.55366082   5.26420076   5.23826218   5.09779937
   4.93051541   4.81678845   4.6654856    4.5812231    4.49975837
   4.46347351   4.35818926   4.24764828   4.12818481   4.09576215
   3.99808179   3.91041094   3.83021147   3.80424499   3.74471481
   3.67140123   3.58753115   3.56950226   3.51684778   3.45487104
   3.43816766   3.40256956   3.35587502   3.3289706    3.31436722
   3.27854541   3.20420018   3.19380562   3.16802061   3.08813678
   3.07332748   3.05108954   3.03431179   3.00002599   2.98420815
   2.95199529   2.91596109   2.90753347   2.85695113   2.84786127
   2.82518259   2.80506024   2.77895105   2.77289608   2.75554915
   2.71141312   2.69954989   2.67895892   2.6578651    2.64192222
   2.61345521   2.60609843   2.59502119   2.589818     2.5664728
   2.55363655   2.53989174   2.52339773   2.49737662   2.49010906
   2.47390134   2.45453047   2.44513477   2.43572019   2.42117517
   2.41135788   2.40486388   2.3802765    2.37460988   2.35728667
   2.34923144   2.32127225   2.30961047   2.29905581   2.29604429
   2.29222104   2.2828661    2.27435177   2.24861925   2.22807965
   2.22339476   2.20980691   2.2049581    2.19458225   2.18945762
   2.18259262   2.1709492    2.16481361   2.1517137    2.15067804
   2.14332923   2.12891838   2.1271852    2.11093039   2.10924408
   2.09595211   2.07332078   2.0719669    2.07099497   2.05860202
   2.04674293   2.03976456   2.03666923   2.02010204   2.01294224
   2.00597994   1.99396745   1.98885896   1.97384597   1.96406401
   1.95050155   1.94342839   1.94250105   1.93384423   1.92997893
   1.92027931   1.89973168   1.89740539   1.88835295   1.87847213
   1.87284461   1.86531681   1.8601164    1.83527084   1.82956057
   1.82530611   1.8197568    1.81192373   1.79461338   1.78893323
   1.77924821   1.76637792   1.76038513   1.75158531   1.73867384
   1.73614087   1.72651047   1.72312243   1.71145151   1.70304775
   1.69190592   1.67361003   1.67011953]
Total Variance Explained by all components: 0.4807552975592656
Shape of the reduced data:
(56318, 178)
Reduced data sample: [[ 1.33003764e+02  3.11663647e+01  1.23427211e+02  1.54590266e+01
  -2.34470218e+01 -6.45674030e+00  1.28157029e+01  4.84912830e+01
   2.18299697e+00  2.26882083e+00  3.27898849e+01  4.25133091e+01
   2.81328312e+01  3.21981466e+01  1.59423581e+01  2.61139529e+01
   9.76985420e+00 -6.71727626e+00  6.66849647e+00  1.29677271e+00
   5.85295966e-01 -7.83435835e+00  1.21213665e+01 -7.41389341e+00
   1.77335571e+01 -4.35159952e+00  2.03580662e+01  6.99088168e+00
   1.29650472e+01  6.95645453e+00  1.56957261e+01  1.44081964e+01
   1.17746407e+01 -8.79339780e+00 -7.21103110e+00  1.73335236e+01
  -1.05395787e+01  8.59173486e+00 -6.24557163e+00 -8.39837884e+00
   1.11053393e+00 -2.74176700e+00 -7.15861275e+00  1.93681670e+00
   3.02852368e+00  5.34950637e+00 -2.66109886e+00  1.18322673e+01
   6.86254816e+00  1.34178572e+01  8.36747033e-01  1.49025907e+01
   6.76411060e+00  5.62704386e+00  3.68263421e+00 -1.92380068e+00
   3.10573613e+00 -3.00924578e+00  4.40191447e+00  5.82441364e+00
  -4.14278549e+00 -2.92107978e+00  1.60808499e+00  1.79763983e+00
  -4.26198107e+00 -2.08837759e+00 -6.56573256e+00 -3.41616881e+00
  -1.10107833e+01 -7.84151389e+00 -3.55760395e+00  2.00181178e+00
   2.70164215e+00 -7.83581603e+00  1.27232389e+01  7.67452911e+00
  -6.39842795e-01  4.51896386e+00  3.78142912e+00  3.11429675e+00
  -1.15737284e+00  8.57861994e+00 -3.01190096e+00  9.99106111e-01
  -7.74327745e+00 -1.78664328e+00 -6.76462999e+00  5.47476938e+00
  -4.44429130e-01  1.92823432e+00 -4.49223703e+00  1.39092168e+00
   5.49433005e-01 -2.40151506e-01 -5.30120098e+00  2.92249917e+00
  -7.69047771e-01 -8.52142003e-01 -1.56746718e+00  1.98711616e+00
  -2.61909589e+00 -2.22400140e+00 -1.33857808e+00 -6.22542678e-03
  -3.90291527e+00  7.47664497e-01  2.93527111e+00  8.79191551e-01
  -4.08724175e-01 -3.30788580e+00 -5.85228194e+00 -5.42031194e+00
   2.73561337e+00  6.56152183e+00  4.32410065e+00  4.94095527e+00
   1.07896327e+00 -2.64253192e+00  3.99056889e+00  7.21497606e+00
  -5.44200966e+00  5.71796081e+00 -1.71746569e-01  5.29965752e-01
   2.87063719e+00 -8.07046723e-02  5.99703772e-01  1.09754789e-01
   1.44179480e+00 -9.80225525e-01  2.86701748e-01 -2.96768006e+00
   8.13238679e+00 -6.31949763e+00 -2.84370398e+00  1.26609595e+00
   1.20646170e+00  1.26331120e+00  2.89286832e+00 -9.82652059e-01
  -6.70208306e+00 -4.72684840e-01 -1.31973028e+00 -1.20144863e+00
   2.92667717e+00  9.36744117e-01 -2.12606479e+00 -2.40996572e+00
   1.24667783e+00 -1.20576403e+00 -9.55256368e-01  7.13186045e+00
  -9.72205409e-01 -2.06621437e+00 -2.18737290e+00 -3.49451490e+00
   7.29649055e-01 -7.73059023e-01  5.78932030e-01  4.15239788e+00
  -1.33720352e+00 -3.03076129e+00 -1.71950081e+00  1.75995747e+00
  -2.95296000e-01 -2.77795302e+00  1.26109901e+00  2.07675406e-01
   1.57580633e+00 -1.75349389e+00 -4.62736580e+00  5.73661340e+00
   2.96268063e+00 -3.27732831e+00  2.56250034e+00  2.14561621e+00
  -2.61151174e+00 -6.08572270e-01]
 [ 1.21155080e+02  5.69579855e+01  8.59574217e+01 -8.90387651e+00
   5.83568898e+00 -1.96499959e+00 -8.10543453e+00  6.02061011e+00
   3.19552371e+01  1.31808326e+01  1.59283824e+01  3.10155135e+01
   1.88029050e+01  2.58402254e+01  2.62846097e+00  8.23548510e+00
   1.47737847e+01  1.40476017e+01  2.42923232e+00 -4.03656335e+00
   1.38386721e+01  7.13902306e-01 -3.92755647e+00  8.39301353e+00
  -9.63542381e+00 -1.10439850e+01  1.78389262e+00  1.60612221e+01
  -1.53324406e+00  1.73737575e+00  1.96921938e+01  2.77483704e+00
   1.41917055e+00 -4.83603952e-02  4.05003175e+00  7.17215553e+00
  -2.97693900e+00 -8.94181056e+00 -5.19293265e+00 -7.68763694e+00
   4.97857998e+00 -1.85548765e+00  5.00500470e+00 -4.61831158e+00
   1.31903466e+00  2.68646670e+00 -1.57456394e+00  1.04488514e+01
  -9.90772592e+00  1.03557404e+01  5.87850645e+00  1.49123623e+01
   1.19007784e+00  8.94403741e+00  4.19374031e+00 -1.47281323e+00
  -7.49359214e+00  6.41472504e-01  2.18277470e+00  1.57454629e-01
   3.51168924e+00 -6.06233317e+00 -2.00189914e+00  8.28838860e+00
  -2.71355688e+00  5.21837457e+00 -6.47541958e+00 -2.27247955e+00
   1.01326696e+00  3.95110125e-01 -4.30874618e+00 -3.16478514e+00
  -8.38894527e-01 -1.41765521e+00  4.19366447e+00  6.15028383e+00
   4.63309103e+00 -2.13841451e+00  2.72422151e+00  4.54916315e+00
   5.63565351e-01  6.18442558e+00  7.27784208e-01 -3.14383654e-01
   5.08556067e+00  1.12868479e+00  2.39199838e+00  5.58534351e-01
   5.11516993e+00  1.02494830e+00  1.55508295e+00  5.52604693e+00
   9.23500908e-02  2.36221386e-01  5.70621636e+00 -2.00692296e-01
  -2.98040429e+00  1.16721049e+00 -1.56071600e+00  5.74771709e+00
   1.41091275e+00  7.16724826e-01  5.28027630e+00 -3.41018212e+00
  -3.27346390e+00 -1.97805895e+00  2.44366813e+00  1.97679540e+00
   1.80385749e+00 -1.25128898e+00 -3.48138792e+00 -6.25692735e+00
   2.83194490e+00  2.51975423e+00 -2.41690303e+00  3.83541798e-01
  -4.01842026e+00  1.76388662e+00  3.60222746e+00 -7.34871438e-01
   2.71583740e+00 -8.86080028e-01 -7.94560697e-01  8.79735426e-01
   2.40717961e+00 -1.26242305e+00 -3.57513629e+00 -5.55069601e+00
  -1.97419920e+00  2.67322843e+00 -8.27881930e-02  2.30332546e-01
   1.80518895e+00 -2.10905117e+00  1.85536849e-01  2.45663609e+00
  -1.22144710e+00  4.64751538e-01  2.03729594e+00 -2.17950011e+00
  -4.77308680e+00 -2.25266721e-01  3.73902531e+00  2.92226998e+00
   3.05456387e+00 -4.48394919e+00 -2.99723075e+00 -2.47498440e+00
  -6.02011337e-01  8.62787221e-01  9.71411697e-02  4.39636718e+00
  -2.11523748e+00 -9.99898153e-01 -1.91859100e+00 -3.44071689e+00
   9.56455624e-01 -1.88243675e+00 -2.76329493e+00  2.84020899e+00
   8.00228439e-01 -4.25339322e+00 -1.46393735e+00 -1.82632755e-02
  -3.12378052e-01 -1.00963842e+00 -2.50109015e+00 -1.21721078e+00
  -2.98044371e+00  1.94372093e+00 -2.86457335e+00 -2.29917997e+00
   4.59311524e+00  1.55324103e+00  8.76495085e-01  1.90269450e+00
  -6.44956177e-01  4.49386935e+00]]
******** n_components=188 ********
Explained Variance Ratio per Component: [0.09722155 0.03288487 0.02784989 0.01907246 0.01673716 0.01475282
 0.01010298 0.00927506 0.0078448  0.00695542 0.00637769 0.00568514
 0.00550618 0.00521396 0.00476945 0.00443479 0.00425115 0.00404316
 0.00396367 0.00368677 0.00345759 0.00327868 0.00311001 0.00305991
 0.00290327 0.00271685 0.00264588 0.0025908  0.0025124  0.00242962
 0.00233463 0.00227578 0.00215717 0.00214654 0.00208898 0.00202043
 0.00197383 0.00191183 0.0018773  0.00184392 0.00182905 0.00178592
 0.0017406  0.00169164 0.00167837 0.00163836 0.00160241 0.00156963
 0.0015589  0.0015345  0.00150448 0.00147017 0.00146273 0.00144113
 0.00141578 0.00140899 0.00139436 0.00137523 0.00136433 0.00135836
 0.0013436  0.00131308 0.00130872 0.00129848 0.00126549 0.00125965
 0.00125008 0.00124364 0.00122906 0.00122291 0.00120923 0.00119542
 0.00119189 0.00117106 0.00116751 0.00115855 0.00115057 0.00113876
 0.0011363  0.00113117 0.00111109 0.00110695 0.00109921 0.00109007
 0.00108305 0.00107215 0.0010683  0.00106346 0.00106181 0.00105347
 0.0010472  0.00104091 0.00103497 0.00102463 0.00102081 0.00101541
 0.00100915 0.00100275 0.00100104 0.00099193 0.00098976 0.00098832
 0.00097598 0.000973   0.00096775 0.00096378 0.00095353 0.00095029
 0.0009446  0.00094247 0.0009389  0.00093607 0.00093347 0.00092227
 0.00091895 0.00091571 0.00090746 0.00090593 0.00090279 0.00089894
 0.00089609 0.00089434 0.00089089 0.00088827 0.00088398 0.00088178
 0.00087955 0.0008736  0.00086752 0.00086679 0.00085868 0.00085618
 0.00085182 0.00084955 0.00084268 0.00084268 0.00083869 0.00083281
 0.00083114 0.00082908 0.00082421 0.00082105 0.00081967 0.00081213
 0.00081019 0.00080573 0.00080361 0.00079654 0.00079485 0.00079345
 0.00078826 0.00078629 0.00078224 0.00077643 0.00077375 0.0007697
 0.00076974 0.00076613 0.00076367 0.00076301 0.00075729 0.00075441
 0.0007492  0.00074703 0.00074242 0.00074222 0.0007392  0.00073424
 0.00073199 0.00072851 0.00072234 0.00071938 0.00071296 0.00070951
 0.00070862 0.00070571 0.0007013  0.00069692 0.00069384 0.00069165
 0.0006902  0.00068479 0.00068194 0.00067637 0.0006731  0.00067142
 0.0006692  0.00066773]
Explained Variance per Component: [237.2527247   80.24995646  67.96295031  46.54311306  40.84420643
  36.0017507   24.65460654  22.63420233  19.14389615  16.97351607
  15.56368001  13.87362376  13.43690363  12.72377825  11.63902525
  10.82235195  10.37420573   9.86664449   9.67265811   8.99693939
   8.43765705   8.00106851   7.58945303   7.46718932   7.08494057
   6.63001863   6.45683249   6.32240039   6.13108303   5.92906949
   5.6972585    5.55366436   5.26420195   5.23826193   5.09780159
   4.9305184    4.81679755   4.66548517   4.58123938   4.49976269
   4.46348349   4.35822896   4.24764646   4.12816117   4.09578567
   3.99812894   3.91041361   3.83040406   3.80422222   3.74467738
   3.67142819   3.5876998    3.56955072   3.51682181   3.45497778
   3.43839394   3.40270706   3.35600901   3.32941851   3.31483993
   3.27883194   3.20434956   3.19370768   3.16872114   3.08820436
   3.07395634   3.05061684   3.03488522   2.99931566   2.98429618
   2.95091428   2.91722704   2.90860451   2.85778111   2.84909869
   2.82723665   2.80776889   2.77895539   2.77293639   2.76041949
   2.71141901   2.70133342   2.68244764   2.66013096   2.64299873
   2.61640035   2.60700516   2.59520336   2.59115846   2.570804
   2.55552044   2.54017567   2.52566454   2.50043368   2.49110704
   2.47792967   2.46266956   2.44704122   2.44285782   2.42063107
   2.41534519   2.41181614   2.38172182   2.37444052   2.36163798
   2.35195138   2.32693405   2.3190194    2.30514747   2.29994837
   2.29123243   2.28431998   2.27798751   2.25063713   2.24255034
   2.23464616   2.21450856   2.21077993   2.2031182    2.19371771
   2.18676244   2.18247676   2.17405913   2.16768196   2.15719648
   2.15182915   2.14640038   2.13186587   2.11703587   2.11525989
   2.09546764   2.08936694   2.07873184   2.07318507   2.05640877
   2.05642849   2.04668794   2.03232531   2.02824775   2.02323802
   2.0113378    2.00364449   2.0002602    1.98185478   1.97713112
   1.9662459    1.96107324   1.94380912   1.93969036   1.9362749
   1.92361983   1.91881653   1.90892406   1.89474064   1.88820478
   1.87832529   1.87842695   1.86960324   1.86359976   1.86199426
   1.84803967   1.84102087   1.82829021   1.82299481   1.81174537
   1.81126928   1.80389075   1.79179486   1.78628989   1.77779971
   1.76275907   1.75551731   1.73985728   1.73143698   1.72927306
   1.72215649   1.71140869   1.70071111   1.69320905   1.68784744
   1.68431866   1.67111528   1.66416706   1.65056411   1.64258492
   1.63849762   1.63306723   1.62947105]
Total Variance Explained by all components: 0.48798305346769244
Shape of the reduced data:
(56318, 188)
Reduced data sample: [[ 1.33003764e+02  3.11663647e+01  1.23427211e+02  1.54590266e+01
  -2.34470218e+01 -6.45674026e+00  1.28157025e+01  4.84912827e+01
   2.18299731e+00  2.26881728e+00  3.27898818e+01  4.25133083e+01
   2.81328277e+01  3.21981402e+01  1.59423799e+01  2.61139339e+01
   9.76995408e+00 -6.71708840e+00  6.66858948e+00  1.29681255e+00
   5.85277485e-01 -7.83504931e+00  1.21205395e+01 -7.41431832e+00
   1.77301926e+01 -4.35138155e+00  2.03581621e+01  6.99028594e+00
   1.29669635e+01  6.95469663e+00  1.56956525e+01  1.44027222e+01
   1.17796211e+01 -8.79044422e+00 -7.21229262e+00  1.73263360e+01
  -1.05525176e+01  8.57240843e+00 -6.22157474e+00 -8.40182503e+00
   1.10069462e+00 -2.72230897e+00 -7.17087967e+00  1.91132424e+00
   3.01190769e+00  5.29190461e+00 -2.69247138e+00  1.18553384e+01
   6.84586599e+00  1.33998684e+01  8.24361265e-01  1.50689870e+01
   6.48062101e+00  5.75788469e+00  3.62324160e+00 -1.96688553e+00
   3.04406314e+00 -3.00834851e+00  4.18541057e+00  5.93516948e+00
  -4.11905897e+00 -2.68172025e+00  1.70396874e+00  1.85531836e+00
  -4.24597632e+00 -1.94145670e+00 -6.82945251e+00 -3.81875984e+00
  -1.08607670e+01 -7.87689570e+00 -3.35156051e+00  1.95776352e+00
   2.80509010e+00 -4.95498370e+00  1.43321325e+01  7.84936676e+00
  -9.36635156e-01  4.21344570e+00  4.54872666e+00  3.01653626e+00
   6.57287433e-01 -7.75655002e+00 -2.98955189e+00 -1.42409035e-01
  -8.89137123e+00 -1.79749620e+00  6.88282421e+00  3.69535362e+00
  -4.50459435e-02  8.47556453e-01 -4.55645551e+00 -9.14983020e-01
   1.40410794e+00  8.43633957e-01 -5.27415972e+00  2.04320350e+00
  -1.35185039e+00 -2.69406857e+00 -1.80119649e+00  1.94801444e+00
   3.52863132e+00  2.78003550e+00 -7.99854923e-01  4.85059778e-01
   4.05469612e+00  1.55974923e+00 -4.27212714e+00  2.26338356e+00
  -1.01065191e+00 -6.89002478e+00  5.03986543e-01 -1.04412332e+00
   6.96015600e+00 -5.81557083e-01  6.61319916e+00 -1.64044130e+00
   1.74518254e+00  8.72412171e+00  4.41820239e+00  1.75818809e+00
   2.97617751e+00  5.02377503e-02 -1.00125576e+00  1.75757311e-02
   1.65916562e-01 -8.98132246e-01 -6.98070464e-01 -2.81040531e-01
  -3.25674327e+00  4.94641689e+00 -3.83722092e+00  8.04383255e-01
   7.75186892e+00 -2.74366694e+00  1.87469914e+00 -5.10564502e+00
   5.77292227e+00  3.15572483e+00 -4.08006393e-01  4.57189529e+00
   4.89145986e-01 -7.11559470e-01  1.51886988e+00  1.40906816e+00
  -4.00160608e+00  3.85771381e+00 -4.88639052e+00  9.60873276e-01
   3.97479266e+00  2.28193243e+00 -1.29241014e+00  1.14359243e+00
   1.03416595e+00  1.38274460e+00  2.63533271e+00 -3.00754668e+00
  -4.13292011e+00 -5.66336072e+00 -8.39065199e-01 -1.56182892e+00
   2.90286032e+00  1.19622480e+00 -1.15234104e+00 -2.44937557e+00
  -1.59904386e+00  1.04186902e+00 -3.97340001e+00 -1.87423974e+00
  -1.89227449e+00 -3.28510926e+00  2.87474113e+00  1.23509838e+00
  -3.49187102e-01 -1.91449533e+00  1.29742830e-01 -2.46654255e+00
   6.43030967e-01 -2.06323044e+00 -1.71924683e+00  2.83969564e-01
  -2.23103984e+00  5.58199173e+00  1.64155105e+00 -8.16787804e-01
   4.38849063e+00  3.14677606e-01 -1.48689138e+00 -3.42603489e+00]
 [ 1.21155080e+02  5.69579855e+01  8.59574217e+01 -8.90387651e+00
   5.83568898e+00 -1.96499959e+00 -8.10543473e+00  6.02060945e+00
   3.19552383e+01  1.31808267e+01  1.59283872e+01  3.10155045e+01
   1.88029114e+01  2.58402412e+01  2.62846429e+00  8.23554287e+00
   1.47738503e+01  1.40477381e+01  2.42934937e+00 -4.03664836e+00
   1.38382999e+01  7.13877917e-01 -3.92820412e+00  8.39284263e+00
  -9.63763171e+00 -1.10431324e+01  1.78171252e+00  1.60610597e+01
  -1.52672404e+00  1.73672081e+00  1.96903059e+01  2.77487188e+00
   1.41905611e+00 -5.19760934e-02  4.04890543e+00  7.14467805e+00
  -2.98074079e+00 -8.94227827e+00 -5.20874767e+00 -7.67523608e+00
   4.98566611e+00 -1.86826687e+00  4.99913245e+00 -4.65199365e+00
   1.31110006e+00  2.67216873e+00 -1.55161967e+00  1.04429863e+01
  -9.93298724e+00  1.03082763e+01  5.91865752e+00  1.49740258e+01
   9.95278742e-01  8.99334464e+00  4.19976409e+00 -1.39043616e+00
  -7.62318030e+00  5.13710711e-01  2.31417924e+00  2.85738047e-01
   3.37324619e+00 -6.25410390e+00 -1.85139380e+00  8.24068503e+00
  -2.75475104e+00  5.23516830e+00 -6.53635262e+00 -2.13038875e+00
   1.16081106e+00  3.19614948e-01 -4.36130044e+00 -3.01394958e+00
  -1.03645743e+00 -4.67432708e-01  4.61742903e+00  6.21615678e+00
   4.18148357e+00 -2.08969515e+00  2.55932796e+00  4.24143279e+00
   1.57978155e+00 -6.51612972e+00 -3.27175641e-01 -2.20328263e-01
   3.83603682e+00  3.34255041e-01 -2.65650828e+00 -1.05889624e-01
   4.67249961e+00  8.07852963e-01  2.50243549e+00 -5.62039479e+00
   2.07701155e-01  1.19823448e+00  3.73594965e+00  6.63240996e-01
  -3.03741698e+00  1.33317403e+00  1.70202060e+00  5.17846719e+00
  -2.11587677e+00 -1.77385877e+00  5.34176419e+00 -5.09703928e+00
   1.69263275e-01  6.79141562e-01 -4.26369875e+00  1.00108749e+00
  -2.91277788e+00 -3.19848684e+00  1.10562706e+00 -7.50393072e-01
   6.70005525e+00  8.18454255e-03  1.52988990e-01  2.62790949e+00
   1.90770987e+00 -6.37523675e-02  2.81479885e-01 -8.37500769e-01
   1.60293108e+00 -4.66123625e+00  3.19995128e+00  5.30864226e+00
   2.41395143e+00 -3.16024632e+00 -3.91274675e+00 -3.81357886e+00
   3.84271081e-01 -1.63354533e+00 -1.18453102e+00  2.09282896e+00
   2.49056077e+00  4.47667504e-01  3.28014591e+00 -3.29444046e+00
   1.76214068e+00  4.25761743e+00 -1.72615858e+00  2.67026310e+00
   2.33712918e+00 -9.76185222e-01  1.63516793e+00  9.43460167e-01
  -4.27234940e+00  5.59979869e-01 -6.40559572e-01 -2.26731426e+00
   3.09010170e+00  1.10817458e-01  3.02546352e+00 -2.69010541e+00
   1.18956839e-02  1.50615231e+00 -3.57955233e+00 -2.33899913e+00
   6.06664678e-02 -1.72162165e+00 -4.67493712e-01 -3.45054699e+00
  -3.61867524e+00 -1.22492333e+00 -1.03360987e+00 -2.59299402e+00
  -2.28242777e-01  7.57897135e-01  2.36115830e-01  8.88554596e-01
   1.49431708e+00  1.73063072e+00  1.68202970e+00 -3.24536224e+00
   4.73347324e-01 -2.96857205e+00 -2.91063611e-01 -2.51834232e+00
  -2.95455494e-01 -5.63419664e+00 -9.77462612e-01  9.81478938e-01
  -7.74810536e-01  1.84996437e+00 -2.39022967e+00  4.62417313e+00
   4.98220031e+00  4.55247266e-01 -1.15150046e+00  1.98267304e+00]]
******** n_components=198 ********
Explained Variance Ratio per Component: [0.09722155 0.03288487 0.02784989 0.01907246 0.01673716 0.01475282
 0.01010298 0.00927506 0.0078448  0.00695542 0.00637769 0.00568514
 0.00550618 0.00521396 0.00476945 0.00443479 0.00425115 0.00404316
 0.00396367 0.00368677 0.00345759 0.00327868 0.00311001 0.00305991
 0.00290327 0.00271685 0.00264588 0.0025908  0.0025124  0.00242962
 0.00233463 0.00227578 0.00215717 0.00214654 0.00208898 0.00202043
 0.00197383 0.00191182 0.00187731 0.00184392 0.00182905 0.00178591
 0.0017406  0.00169165 0.00167838 0.00163837 0.00160243 0.00156963
 0.00155897 0.00153453 0.00150449 0.00147019 0.00146279 0.00144121
 0.00141583 0.00140901 0.00139447 0.00137534 0.00136442 0.00135851
 0.00134386 0.00131314 0.00130881 0.00129846 0.00126573 0.00125975
 0.00125025 0.00124401 0.00122939 0.00122301 0.0012099  0.00119539
 0.00119227 0.00117144 0.00116766 0.00115869 0.00115021 0.00113918
 0.00113639 0.00113106 0.00111203 0.00110684 0.0010987  0.00108973
 0.00108428 0.00107245 0.0010692  0.00106316 0.00106246 0.00105371
 0.00104742 0.00104195 0.00103446 0.00102579 0.00102183 0.00101644
 0.00100858 0.00100323 0.00100283 0.00099327 0.00099108 0.00098745
 0.00097816 0.00097671 0.00096935 0.00096531 0.00095311 0.00094949
 0.00094283 0.00094194 0.00094129 0.00093666 0.00093637 0.00092627
 0.0009187  0.00091321 0.00090719 0.00090431 0.00090401 0.00090137
 0.00090047 0.0008958  0.00089322 0.00088859 0.00088494 0.00088421
 0.00087886 0.00087619 0.00087036 0.00086721 0.00086421 0.00086141
 0.00085406 0.00085136 0.00084639 0.00084423 0.00084118 0.0008403
 0.00083451 0.00083143 0.00082723 0.00082511 0.00081791 0.00081579
 0.00081117 0.00080899 0.00080764 0.00080532 0.00080059 0.00079807
 0.00079494 0.00078953 0.00078843 0.00078565 0.00078255 0.00077795
 0.00077567 0.00076941 0.00076802 0.00076553 0.00076226 0.00076031
 0.00075741 0.00075096 0.00074764 0.0007448  0.00074372 0.00074139
 0.00073317 0.00073086 0.00072723 0.00072625 0.00072431 0.00071978
 0.0007213  0.00071225 0.00071003 0.00070806 0.00070464 0.00070062
 0.0006995  0.00069752 0.00069496 0.00069027 0.0006877  0.00068424
 0.00068241 0.00067556 0.00067437 0.00066744 0.00066618 0.00065995
 0.000658   0.00065906 0.00065245 0.00064699 0.00064369 0.00063914]
Explained Variance per Component: [237.2527247   80.24995646  67.96295031  46.54311306  40.84420643
  36.0017507   24.65460654  22.63420233  19.14389614  16.97351606
  15.56368001  13.87362375  13.43690363  12.72377827  11.63902527
  10.82235192  10.3742057    9.86664448   9.6726581    8.99693936
   8.43765709   8.00106839   7.58945317   7.46718912   7.08494025
   6.63001863   6.45683277   6.32240136   6.1310837    5.92907001
   5.69725979   5.55366442   5.26420274   5.23826307   5.09780399
   4.93052158   4.81680857   4.66548454   4.58124897   4.49977018
   4.46347932   4.35821954   4.2476418    4.12817431   4.09579235
   3.99816019   3.91046176   3.83042208   3.8043967    3.74477106
   3.67145369   3.58775545   3.56969757   3.51702276   3.45508548
   3.43845396   3.40297942   3.35627501   3.32964411   3.31522071
   3.27946756   3.20449446   3.19392156   3.16867262   3.0888094
   3.07420538   3.05103237   3.03580554   3.00012988   2.98455782
   2.95255577   2.91713772   2.90953714   2.85869972   2.84948415
   2.82758206   2.8068878    2.77998236   2.77316697   2.76015569
   2.713716     2.70104963   2.68120208   2.65931238   2.64600231
   2.61712427   2.60921261   2.59445955   2.59274489   2.57138883
   2.55605048   2.5427104    2.52441477   2.50327694   2.49360877
   2.48044227   2.46126161   2.44821943   2.44724041   2.4239018
   2.41856581   2.40970421   2.38703403   2.38348777   2.36552293
   2.35566834   2.32590654   2.31706072   2.30081572   2.29864603
   2.29705235   2.28576169   2.2850566    2.26039756   2.24192259
   2.22852906   2.21383937   2.20681      2.20607123   2.19962878
   2.19744002   2.18604575   2.17974062   2.16844816   2.15954301
   2.15776663   2.14470953   2.13820386   2.12397294   2.11627209
   2.10896418   2.10212218   2.08418364   2.07759832   2.0654635
   2.06020708   2.05274708   2.05062101   2.03648072   2.02896478
   2.01870922   2.01354498   1.99597877   1.99080289   1.9795314
   1.97420299   1.97089665   1.96525136   1.95369833   1.94754904
   1.93990608   1.92670206   1.92404001   1.91725215   1.90968661
   1.89844724   1.89288581   1.87761156   1.87422341   1.86815794
   1.86015712   1.85541475   1.84832861   1.83259344   1.82447725
   1.81755394   1.8149258    1.80922783   1.78917733   1.78354411
   1.774693     1.77228213   1.76756255   1.75649299   1.76020767
   1.73812433   1.73271359   1.7278987    1.71956329   1.70975044
   1.70701986   1.70217242   1.69592141   1.68449262   1.67822204
   1.66976659   1.66530836   1.64857799   1.64569748   1.62876239
   1.62568944   1.61048551   1.60572965   1.60831605   1.59220384
   1.57888093   1.57081966   1.55972339]
Total Variance Explained by all components: 0.49496274517137284
Shape of the reduced data:
(56318, 198)
Reduced data sample: [[ 1.33003764e+02  3.11663647e+01  1.23427211e+02  1.54590266e+01
  -2.34470218e+01 -6.45674031e+00  1.28157024e+01  4.84912826e+01
   2.18299819e+00  2.26881562e+00  3.27898757e+01  4.25133090e+01
   2.81328521e+01  3.21981327e+01  1.59423821e+01  2.61139946e+01
   9.76989935e+00 -6.71718769e+00  6.66851906e+00  1.29683160e+00
   5.85032803e-01 -7.83430715e+00  1.21220498e+01 -7.41439205e+00
   1.77307936e+01 -4.35172720e+00  2.03576145e+01  6.99062273e+00
   1.29678666e+01  6.95318993e+00  1.56952953e+01  1.44028792e+01
   1.17817869e+01 -8.79152491e+00 -7.21080199e+00  1.73206003e+01
  -1.05618729e+01  8.58450377e+00 -6.22424367e+00 -8.41590150e+00
   1.12727559e+00 -2.73803447e+00 -7.16715580e+00  1.92520407e+00
   3.01766832e+00  5.31936464e+00 -2.67366217e+00  1.18902400e+01
   6.85360471e+00  1.34467359e+01  8.03105616e-01  1.49346740e+01
   6.85124620e+00  5.68741735e+00  3.62898688e+00  1.97359449e+00
   3.06821969e+00 -2.98019613e+00  4.30539047e+00  5.96893164e+00
  -4.11412308e+00 -2.69755548e+00  1.63963520e+00  1.77479923e+00
  -4.15600226e+00 -1.62543483e+00 -6.76175644e+00  3.91454196e+00
  -1.13654055e+01 -7.21550376e+00 -3.65575286e+00  2.07576430e+00
   2.66680871e+00 -6.88648276e+00  1.37228372e+01  8.21763044e+00
  -1.61369239e+00  3.46880395e+00  4.67068137e+00  3.55463030e+00
  -5.73457229e-01 -8.75845435e+00 -3.13020747e+00 -2.62118480e-01
   6.85537546e+00 -2.86643635e+00 -5.54359493e+00 -2.64417737e+00
   4.77230337e+00  1.15549513e+00 -4.57989224e+00 -2.33722716e+00
   1.58197265e+00 -2.80645284e+00 -5.61376848e+00  2.02148231e+00
   2.47699202e+00 -1.65547054e+00 -1.34885799e+00 -7.16750639e-01
  -3.29125682e+00  1.62035331e+00 -1.14409051e+00 -4.65258461e-01
   2.43257488e+00 -3.36835426e+00  2.63955280e-01  4.80486060e+00
  -3.73564056e+00  3.88681254e+00  4.22166065e+00  1.58145085e+00
  -3.31797793e+00  6.06384390e+00  5.18980937e+00  9.84192076e-01
   1.31859608e+00  3.42333767e+00  9.35413173e+00 -3.03289210e+00
   3.05730131e+00  3.10303596e+00 -2.24844770e+00  2.94956920e+00
   8.78409208e-02  1.42109552e+00 -5.46159959e-02  2.63905946e+00
  -3.67957637e-01  4.25875366e+00  7.71253452e+00  3.00549106e+00
   3.74132554e+00 -3.71675286e+00  3.54675586e+00  2.35743069e+00
   5.36491087e+00 -4.37445235e-01  4.87482337e+00 -1.22141531e-01
  -3.35360948e+00 -1.90298690e+00  2.54585333e+00  2.46983270e+00
  -6.57665666e-01 -1.52038245e+00  7.35382727e-01  1.32045245e+00
   7.90326649e-01  2.76886965e+00  2.44477815e-01  1.72609421e+00
  -5.62987100e+00 -2.99457806e+00  8.49711192e-01  4.86790606e+00
   1.68797841e-01 -8.20519390e-03  1.99349115e+00 -2.92590205e+00
  -7.14365168e-01 -5.26325498e-01 -3.52705425e+00 -1.08885643e+00
  -1.18255783e-02  3.65849663e+00  2.77802304e-01 -8.58464333e-01
  -1.49942281e+00  3.82896800e+00  5.17965041e+00  1.32170857e+00
  -1.97310615e+00  8.01862371e-03 -1.38576226e+00  1.58313932e+00
   3.83741845e+00 -2.77012047e+00 -2.35421300e-01  4.73849475e+00
  -3.93227708e+00  3.05269925e+00 -3.51740914e+00  3.32998092e+00
   2.07842675e+00 -7.34792020e-01  7.94175280e+00 -4.13599467e+00
  -6.62064496e+00 -1.20332256e+00  4.57958624e+00  2.01789086e+00
  -4.47703297e-01  2.67588360e+00  4.93961973e-02 -2.32264834e+00
  -6.63266603e-01  4.13154331e+00]
 [ 1.21155080e+02  5.69579855e+01  8.59574217e+01 -8.90387652e+00
   5.83568898e+00 -1.96499955e+00 -8.10543465e+00  6.02060886e+00
   3.19552388e+01  1.31808312e+01  1.59283732e+01  3.10155067e+01
   1.88029056e+01  2.58402519e+01  2.62847122e+00  8.23556275e+00
   1.47737876e+01  1.40477833e+01  2.42924438e+00 -4.03674831e+00
   1.38384474e+01  7.14474752e-01 -3.92783447e+00  8.39347765e+00
  -9.63593353e+00 -1.10430570e+01  1.78361091e+00  1.60588782e+01
  -1.53027879e+00  1.73605477e+00  1.96944695e+01  2.77637684e+00
   1.41851684e+00 -4.64049542e-02  4.04856262e+00  7.15409220e+00
  -2.98168674e+00 -8.93655696e+00 -5.20823364e+00 -7.67340071e+00
   4.98222129e+00 -1.86035649e+00  5.01396031e+00 -4.63354011e+00
   1.31371340e+00  2.67238394e+00 -1.54899265e+00  1.04033860e+01
  -9.94209636e+00  1.03475046e+01  5.86855860e+00  1.49403215e+01
   1.34461547e+00  8.96657840e+00  4.28153711e+00  1.42077011e+00
  -7.51681588e+00  5.66453231e-01  2.31862008e+00  1.00192222e-01
   3.33168394e+00 -6.11252384e+00 -1.86576218e+00  8.46616961e+00
  -2.60423026e+00  5.47291139e+00 -6.30173379e+00  2.38105986e+00
   1.00770757e+00  4.78365194e-01 -4.30539559e+00 -3.60022149e+00
  -8.65515988e-01 -5.83048102e-01  4.53639338e+00  6.44569544e+00
   3.71469363e+00 -2.75227423e+00  2.23745581e+00  3.85385633e+00
   1.48130863e+00 -6.23956033e+00  6.83810546e-02  2.44858458e-01
  -4.34518614e+00  1.44744222e+00  1.88205382e+00  5.27120036e+00
   7.44400206e-01  8.99714137e-01  2.09063883e+00 -5.28865947e+00
  -8.09633400e-01  1.27625564e+00  4.79988356e+00  8.19155278e-01
   3.68208892e+00  1.26780634e+00  1.17682360e+00  5.98593627e+00
  -5.65649076e-01  4.83650761e-01  5.13142536e+00  4.14504631e+00
   5.04869585e-01 -2.44534045e+00  3.21096518e+00  5.29263937e+00
  -2.23528406e+00  2.59670050e+00  2.16874596e+00  3.51800389e+00
  -5.97573310e+00  9.26074337e-01 -2.00723781e+00 -2.19934535e+00
   2.52996294e+00 -1.25881072e-01  3.57934731e+00 -2.50456288e+00
   1.68706114e+00 -1.66449420e+00  1.59899358e+00 -6.48016113e+00
   1.71885692e+00  2.02567845e+00 -5.66839500e+00  2.45249402e+00
   2.70429690e+00  1.81268150e+00  1.59944270e+00  3.67533901e-01
   1.31971207e+00 -1.14909051e+00  4.92981523e+00  4.15988477e+00
   4.15600174e+00  5.06034038e-01  3.53614437e+00  2.25178017e+00
  -2.51944730e-01  2.85168077e+00  2.76007215e+00 -2.44488107e+00
   1.32053212e+00  1.96437424e+00 -2.69178486e-01  2.70194895e+00
  -4.51703843e-01  1.78589046e+00  1.51991366e+00  1.29347969e+00
  -3.00117389e+00  2.18734789e+00 -8.96416969e-01  1.32241288e+00
   2.72168783e+00  3.01367338e-01  3.13161148e+00 -3.08748600e-01
  -3.23025157e+00  2.26724498e+00 -6.89764715e-01 -2.13991295e+00
  -3.76996501e+00  6.56993162e-01 -1.97780333e-01 -2.61160386e-01
  -1.67358713e+00  6.27330290e+00 -8.30934332e-01 -4.57022080e+00
  -1.81129710e+00  3.50157303e-01 -1.20220020e+00  4.61066366e+00
   1.50607962e-01 -2.45890769e-01 -6.13112914e-01 -2.17664423e+00
   1.11517934e+00  7.46394436e-01 -8.88178707e-01  1.67962586e+00
   2.47140427e+00 -7.74972435e-01  2.71087856e-01  1.16371752e-02
  -5.48279067e+00  1.17901014e+00  2.96793752e+00 -3.25213160e+00
   2.50965173e-01 -3.34579950e-01 -7.82811382e-01 -4.82679479e-01
   1.24618659e+00  2.21065405e+00]]
time: 1min 46s (started: 2024-04-25 21:55:15 -07:00)
In [ ]:
pipe = make_pipeline(
    TruncatedSVD(n_components=10),
    memory="cache_svd_n")
grid = GridSearchCV(
    pipe,
    param_grid={
        "truncatedsvd__n_components":np.random.randint(20, 500, 20),
    },
    scoring='explained_variance',
    cv=5,
    refit=False,
    n_jobs=-1)

print(pipe)

grid.fit(X_fill_zero[:,1:])

2.2.1.2 Principle Component Analysis¶

In [ ]:
from sklearn.decomposition import PCA
plt.rcParams["font.family"] = "DejaVu Serif"
covar_matrix = PCA(n_components = len(X_fill_zero.columns)-1) #start from the number of all features we have
covar_matrix.fit(X_fill_zero[:,1:])
plt.ylabel( 'Eigenvalues')
plt.xlabel('# of Features')
plt.title('PCA Eigenvalues')
plt.ylim(0,max(covar_matrix.explained_variance_))
plt.style.context ('seaborn-whitegrid')
plt.plot(covar_matrix.explained_variance_)
plt.show()
No description has been provided for this image
time: 8.57 s (started: 2024-04-25 18:52:20 -07:00)
In [ ]:
from sklearn.decomposition import PCA
covar_matrix = PCA(n_components = 50) #components are equal to the number of features we have
covar_matrix.fit(X_fill_zero[:,1:])
plt.ylabel( 'Eigenvalues')
plt.xlabel('# of Features')
plt.title('PCA Eigenvalues')
plt.ylim(0,max(covar_matrix.explained_variance_))
plt.style.context ('seaborn-whitegrid')
plt.axvline(x=8, color='r', linestyle='--')
plt.plot(covar_matrix.explained_variance_)
plt.show()
No description has been provided for this image
time: 3.92 s (started: 2024-04-25 18:54:46 -07:00)
In [ ]:
from sklearn.decomposition import PCA
covar_matrix = PCA(n_components = 10) #components are equal to the number of features we have
covar_matrix.fit(X_fill_zero[:,1:])
kn = KneeLocator(range(0,10), covar_matrix.explained_variance_, curve='convex', direction='decreasing')
print(f"{kn.knee=}")
plt.ylabel( 'Eigenvalues')
plt.xlabel('# of Features')
plt.title('PCA Eigenvalues')
plt.ylim(0,max(covar_matrix.explained_variance_))
plt.style.context ('seaborn-whitegrid')
plt.axvline(x=kn.knee, color='r', linestyle='--')
plt.plot(covar_matrix.explained_variance_)
plt.show()
kn.knee=2
No description has been provided for this image
time: 2.32 s (started: 2024-04-25 19:00:01 -07:00)
In [ ]:
from kneed import KneeLocator
kn = KneeLocator(range(0,40), covar_matrix.explained_variance_, curve='convex', direction='decreasing')
print(f"{kn.knee=}")
6
time: 3.24 ms (started: 2024-04-25 18:54:19 -07:00)
In [ ]:
from kneed import KneeLocator
kn = KneeLocator(range(0,101), covar_matrix.explained_variance_, curve='convex', direction='decreasing')
print(f"{kn.knee=}")
8
time: 2.47 ms (started: 2024-04-25 18:46:08 -07:00)
In [ ]:
k = 60
from yellowbrick.cluster import silhouette_visualizer
visualizer = silhouette_visualizer(KMeans(60, random_state=42), X_fill_zero[:,1:])
No description has been provided for this image
time: 2min 1s (started: 2024-04-25 18:57:43 -07:00)
In [ ]:
k = 2
from yellowbrick.cluster import silhouette_visualizer
visualizer = silhouette_visualizer(KMeans(2, random_state=42), X_fill_zero[:,1:])
No description has been provided for this image
time: 1min 59s (started: 2024-04-25 19:00:32 -07:00)
In [ ]:
k = 3
from yellowbrick.cluster import silhouette_visualizer
visualizer = silhouette_visualizer(KMeans(3, random_state=42), X_fill_zero[:,1:])
No description has been provided for this image
time: 1min 59s (started: 2024-04-25 19:05:32 -07:00)
In [ ]:
k = 4
from yellowbrick.cluster import silhouette_visualizer
visualizer = silhouette_visualizer(KMeans(4, random_state=42), X_fill_zero[:,1:])
No description has been provided for this image
time: 1min 56s (started: 2024-04-25 19:07:55 -07:00)
In [ ]:
k = 5
from yellowbrick.cluster import silhouette_visualizer
visualizer = silhouette_visualizer(KMeans(5, random_state=42), X_fill_zero[:,1:])
No description has been provided for this image
time: 1min 55s (started: 2024-04-25 19:10:25 -07:00)
In [ ]:
k = 6
from yellowbrick.cluster import silhouette_visualizer
visualizer = silhouette_visualizer(KMeans(6, random_state=42), X_fill_zero[:,1:])
No description has been provided for this image
time: 1min 54s (started: 2024-04-25 19:12:26 -07:00)
In [ ]:
k = 7
from yellowbrick.cluster import silhouette_visualizer
visualizer = silhouette_visualizer(KMeans(7, random_state=42), X_fill_zero[:,1:])
No description has been provided for this image
time: 1min 55s (started: 2024-04-25 19:14:27 -07:00)
In [ ]:
k = 8
from yellowbrick.cluster import silhouette_visualizer
visualizer = silhouette_visualizer(KMeans(8, random_state=42), X_fill_zero[:,1:])
No description has been provided for this image
time: 1min 57s (started: 2024-04-25 19:02:37 -07:00)

2.2.2 Fill in Missing Values¶

In [ ]:
X_fill_mean = X.fill_null(strategy='mean')
X_fill_zero = X.fill_null(strategy='zero')
X_fill_min  = X.fill_null(strategy='min')
time: 236 ms (started: 2024-04-25 15:33:15 -07:00)
In [ ]:
X_fill_mean
shape: (56_318, 5_980)
userId791322571318585592959715349933561094875952296260593991141196600696895485813413091529527119868157357872998106782112852593154226653968744306337946377744581704…677089286427866304989569910611666011876013810414164815895619210919395420585728190452210812946327925594685790938199486797194116668169252203619208807224983813952797667620115967127134185997251922
catf64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64…f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64
"175325"4.04.04.54.53.54.04.03.54.04.04.54.04.04.04.04.04.04.54.54.04.54.04.03.53.54.04.03.54.04.04.03.54.04.04.04.5…3.03.53.52.5247524.02.53.54.03.40753.02.8535353.53.03.3109453.2550514.03.53.55.03.04.02.52.53.873.54.03.53.41253.2537691.53.02.03.4924243.02.03.02.645729
"22744"4.05.03.05.05.05.05.04.04.05.05.05.05.03.05.04.04.05.04.05.00.55.03.03.03.04.04.04.04.04.05.05.05.05.03.03.0…3.6040613.35753.852.53.6450784.03.5338543.6381912.02.01.03.89250.53.3109453.2550510.51.02.03.8704660.53.3877553.283.03.872.03.9923863.66753.41253.2537692.03.4070353.1994823.4924242.02.03.02.645729
"17035"1.51.55.04.54.54.03.03.52.03.55.00.55.04.00.51.04.05.04.00.54.54.54.54.51.54.00.54.04.03.6880884.54.00.54.04.04.0…2.01.03.850.53.6450783.53.5338540.53.40750.52.8535353.89252.7806122.53.2550512.04.53.4355673.8704660.52.53.03.53.873.2064683.9923863.66753.53.2537694.03.53.1994823.52.53.52.7219392.645729
"15875"3.05.05.04.05.04.04.03.54.04.05.04.03.54.54.53.03.54.05.04.04.04.05.02.53.04.04.04.05.03.04.04.03.53.54.02.5…3.03.03.852.5247523.6450782.783.5338543.6381913.40753.1947372.54.02.03.3109453.2550514.03.03.4355673.02.7562193.3877553.01.53.873.53.9923863.66753.41253.2537693.01.53.53.4924243.03.252.7219392.645729
"43703"2.55.03.54.03.53.02.52.53.52.53.53.55.03.55.04.03.54.02.53.53.53.54.02.52.04.02.53.53.53.03.53.03.52.53.03.5…1.03.03.01.53.6450782.783.5338543.6381913.40753.1947372.8535351.52.7806121.53.2550512.03.5104173.4355672.02.7562192.52.02.952023.872.53.9923862.03.41252.03.0353.4070353.1994823.4924242.03.252.7219391.5
………………………………………………………………………………………………………………………………………………………………………………………………………
"114825"4.1328164.118844.54.04.54.033314.0281874.0762314.1127614.017894.2264283.9570794.04.0084244.0460463.9784855.05.04.0061423.9339314.081143.9343574.0287353.9563863.5121073.9187743.8977363.7491194.0835593.6880883.8028613.598613.8692683.7393083.9911134.135534…3.6040613.35753.852.5247523.6450782.783.5338543.6381913.40753.1947372.8535353.89252.7806123.3109453.2550513.3826533.5104173.4355673.8704662.7562193.3877553.282.952023.873.2064683.9923863.66753.41253.2537693.0353.4070353.1994823.4924243.4772733.252.7219392.645729
"60686"4.04.54.3285114.05.04.033314.0281874.54.1127614.017895.02.05.04.04.0460463.9784853.9013114.2196314.0061423.55.03.9343574.04.02.53.9187743.8977363.54.0835593.6880884.53.598613.8692683.7393084.54.5…3.6040613.35753.852.5247523.6450782.783.5338543.6381913.40753.1947372.8535353.89252.7806123.3109453.2550513.3826533.5104173.4355673.8704662.7562193.3877553.282.952023.873.2064683.9923863.66753.41253.2537693.0353.4070353.1994823.4924243.4772733.252.7219392.645729
"71434"4.1328164.118844.3285114.1337164.2186584.033314.0281874.0762314.1127614.017895.04.55.04.0084244.0460463.9784853.9013115.04.0061423.9339315.03.9343574.0287353.9563863.5121073.9187743.8977363.7491195.03.6880883.8028613.598613.8692683.7393083.9911134.135534…3.6040613.35753.852.5247523.6450782.783.5338543.6381913.40753.1947372.8535353.89252.7806123.3109453.2550513.3826533.5104173.4355673.8704662.7562193.3877553.282.952023.873.2064683.9923863.66753.41253.2537693.0353.4070353.1994823.4924243.4772733.252.7219392.645729
"57670"4.54.54.3285114.1337164.2186584.033314.0281874.0762314.54.017894.2264284.54.0824264.0084245.05.04.54.2196314.53.9339314.081144.04.0287353.9563864.03.9187744.54.54.0835593.6880883.8028613.598613.8692683.7393083.9911134.135534…3.6040613.35753.852.5247523.6450782.783.5338543.6381913.40753.1947372.8535353.89252.7806123.3109453.2550513.3826533.5104173.4355673.8704662.7562193.3877553.282.952023.873.2064683.9923863.66753.41253.2537693.0353.4070353.1994823.4924243.4772733.252.7219392.645729
"121244"4.1328162.54.54.1337164.2186584.033314.0281874.0762314.1127614.017894.2264283.9570794.0824264.0084244.0460463.9784853.9013114.2196314.0061423.9339314.081143.9343574.0287353.9563863.5121073.9187743.8977363.7491194.0835593.6880883.8028613.598613.8692683.7393083.9911134.135534…3.6040613.35753.852.5247523.6450782.783.5338543.6381913.40753.1947372.8535353.89252.7806123.3109453.2550513.3826533.5104173.4355673.8704662.7562193.3877552.52.952023.873.2064683.9923863.66753.41253.2537693.0353.4070353.1994823.4924243.4772733.252.7219392.645729
time: 38.1 ms (started: 2024-04-25 13:45:04 -07:00)
In [ ]:
# it is hard to explain why you impute these values. so leave it for now.
time: 174 µs (started: 2024-04-25 16:33:20 -07:00)
In [ ]:
# Mean imputer
from sklearn.impute import SimpleImputer
imp_mean = SimpleImputer(missing_values=np.nan, strategy='mean')
df_mean_imputed = pd.DataFrame(imp_mean.fit_transform(df_with_missing),columns = df_with_missing.columns)
time: 8.19 s (started: 2024-04-24 18:15:24 -07:00)
In [ ]:
# Most Frequent imputer
from sklearn.impute import SimpleImputer
imp_mfreq = SimpleImputer(missing_values=np.nan, strategy='most_frequent')
df_mfreq_imputed = pd.DataFrame(imp_mfreq.fit_transform(df_with_missing))
time: 8.41 s (started: 2024-04-24 18:15:32 -07:00)

All the advanced imputer methods takes weeks to run.¶

In [ ]:
# KNN imputer
from sklearn.impute import KNNImputer
imputer = KNNImputer(n_neighbors=5)
df_knn_imputed = pd.DataFrame(imputer.fit_transform(df_with_missing),columns = df_with_missing.columns)
In [ ]:
# quick and dirty. fill in movie mean + random user factor
import miceforest as mf

kds = mf.ImputationKernel(
    df_with_missing,
    datasets=1,
    save_all_iterations=False,
    random_state=420,
)

kds.mice(1)

df_mice_forest_imputed = kds.complete_data()
In [ ]:
dir()
In [ ]:
 
In [ ]:
 
In [ ]:
 

2.3 KMeans Clustering¶

In [ ]:
# From SVD,        we have U           as the User Embeddings.
# From Imputation, we have X_fill_mean as the User Embeddings.
# From LLMs,       we have U_embed     as the User Embeddings.

# Then, we can run either KMeans or UMAP+HDBSCAN on these UEs to get User Clusters.
In [ ]:
U.shape
Out[ ]:
(56318, 10)
time: 1.19 ms (started: 2024-04-26 11:54:43 -07:00)
In [ ]:
kmeans = KMeans(n_clusters=10, random_state=0)
# Fit the KMeans algorithm to the 'U' matrix
kmeans.fit(U)
centroids = kmeans.cluster_centers_
labels = kmeans.labels_
print(f"{centroids.shape=}")
print(f"{labels[:10]=}")
centroids.shape=(10, 10)
labels[:10]=array([1, 1, 1, 1, 1, 1, 1, 1, 9, 1], dtype=int32)
time: 835 ms (started: 2024-04-26 11:58:51 -07:00)
In [ ]:
labels.shape # should be 56318x1. plug it back in X to get avg loading for movies
Out[ ]:
(56318,)
time: 1.26 ms (started: 2024-04-26 12:02:07 -07:00)
In [ ]:
np.array(np.unique(labels, return_counts=True)).T
Out[ ]:
array([[    0, 13843],
       [    1,   262],
       [    2,  2315],
       [    3,  2158],
       [    4, 21798],
       [    5,   876],
       [    6,  5820],
       [    7,  1286],
       [    8,  7293],
       [    9,   667]])
time: 1.86 ms (started: 2024-04-26 12:04:45 -07:00)
In [ ]:
# plug labels back to get movie-cluster loadings
X_L = X.with_columns(
    pl.Series('labels', labels)
).group_by('labels', maintain_order=True).mean().select(
    pl.exclude('userId')
)
X_L
Out[ ]:
shape: (10, 5_980)
labels791322571318585592959715349933561094875952296260593991141196600696895485813413091529527119868157357872998106782112852593154226653968744306337946377744581704…677089286427866304989569910611666011876013810414164815895619210919395420585728190452210812946327925594685790938199486797194116668169252203619208807224983813952797667620115967127134185997251922
i32f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64…f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64
14.04.408564.372474.26074.289374.0102464.0584.0120483.914533.975614.3735184.2430834.3313953.9090914.3012053.9402653.7982064.2454953.8355863.7845854.1186054.3636363.9028344.0374023.4878543.8325584.0443553.9184.1659753.7539684.0117193.6721313.9822133.7145833.6525423.913717…3.2753.1451613.56252.5357142.752.7096772.02.6363643.3409092.6253.0645163.252.7105262.9705883.4166673.0285713.3225813.223.531252.4705883.0681823.2727273.0480774.1785713.1555563.52.7222223.3888893.0277782.9456523.5188682.9130433.6034483.2978723.2272733.1578952.543478
94.3742244.483674.4726964.4124614.307374.2630724.2258844.1890974.2675374.225784.1597854.0737854.0506454.1168384.0962573.9792423.8161484.2467954.1947284.2162164.1107384.0383213.972654.3262643.9589913.9764074.0637964.0789064.1169593.9358773.852743.7346014.1669333.7595283.9947554.145833…3.2222223.4285713.752.93753.3181822.6590912.253.3753.5263163.1842113.1666672.6666672.81252.5454552.5833333.4166673.02.754.53.2678573.02.9090912.8571433.6785713.1785713.1666672.6666673.2916673.0925933.2631583.53.03.3695653.2631583.5714292.52.744898
53.9514694.1431254.2342034.070984.2167333.9213714.0131233.8573263.7674593.9385964.2800784.1144894.2503193.8780664.2089953.8998523.7869634.1987383.7884623.6054423.955724.1372813.8827593.6929463.2033663.7287853.7782313.5727513.9531483.5408023.8983853.4720333.6763.664043.6689713.880435…3.5833333.2441863.8695651.7037043.4166672.5217393.2142863.3846153.0384623.252.3863643.93752.3653853.0576923.2272733.406253.2205883.33.6578952.4310343.2162163.1470592.9285713.9893623.1282053.93.5263163.262.6538463.0263163.1071433.0384623.2777783.6252.8809522.4423082.264706
73.7303543.8396984.0193053.7883214.0004763.7967483.8090513.680193.6762343.7555434.3399823.8631814.0723363.7825113.94493.8793753.7576564.5022693.5259373.3756614.0639013.8657643.8849083.4987332.8641063.6710373.3260143.0814874.0191513.1839423.7274773.287773.4845643.5971433.6480053.778055…3.653.3488374.0538461.8333333.1818182.53.3235293.81252.1666672.3846151.6666674.1326532.753.1710533.1730773.3214293.5468753.451223.8939393.03.7246383.1666673.254.02.8888894.031253.8333333.3166673.02.5833333.7647062.7083333.53.53.2777782.9285711.0
24.3198764.3785274.4163914.3152494.3092924.1878884.1585184.1907534.2086214.145544.1192314.0174114.0786064.0601114.0587163.9724913.8378554.1138064.1271234.089794.1403163.968713.9772864.1819153.7590153.9512843.9949163.9529154.077663.8617933.7557933.6213594.0450243.6966084.0512594.140591…3.4722223.64.1428572.53.5606062.7708333.468753.6428573.2428573.12.7972973.1666672.8753.2272733.03.6153852.73.1111114.252.8243243.1253.1111112.8333333.8333333.4166673.03.4285713.1764713.3421053.03.3253.523.553.4642863.252.5434782.77027
33.8458884.0644034.2354143.9856074.1178113.8727893.905153.8529413.8404513.8728524.172044.0103474.1630933.8590914.1132423.8137183.7670784.0793653.7745753.6285053.8960674.0240833.8402623.648183.0830723.7196613.8073643.515143.9527913.4873273.8066623.528543.6605043.6521573.7082623.883149…3.5526323.2253.5714292.2173913.03.363.6428573.753.3684212.9583332.753.4411762.5666673.3928573.1447373.2954553.4090914.0178573.5833331.7222223.1785713.2857142.8974363.8333333.2428573.8461543.753.3529413.02.92.7647063.2857142.53.406253.252.9751.958333
64.2953024.2728264.4124184.2375514.2712724.0961024.0823114.1426074.2223864.0613224.1656733.9242074.0710864.039844.0109323.9720293.8668074.1582564.1188844.0588694.0886323.9090914.0344784.1390233.7446763.9750933.9504963.8973114.0943943.8176413.7154943.5841893.9691843.6735034.0531364.190691…3.7142863.6666673.2083332.5526323.6290322.6666673.656253.6666673.5753.5781252.7777784.2142863.2253.53.3753.3636363.42.8333333.253.082.63.8181823.0384623.2222223.02.9285713.03.53.53.3571433.4347833.153.8695653.6590913.43.0454553.0
43.9587013.8303854.0434833.946343.9727873.8675373.8502453.8496484.0102933.8672134.0385763.6906283.8152033.8953413.7744293.9165833.8947753.9768233.9781563.7986443.7347583.6015643.9205483.6297353.4659723.7934733.9303183.7796033.871143.7181213.6789093.6191753.6951653.7575063.838054.038248…3.9078953.5862073.6285713.1938784.0571432.7682933.8376623.8482143.6111113.563832.8928574.0096152.8306453.7272733.2714293.6333334.0394743.6956524.0384622.5714293.4531253.4752.6176473.553.4642864.3513513.8555563.4333333.489132.93.53.5540543.6333333.6538463.2441862.6666672.608696
04.3405464.208894.5163124.2929834.4208444.1179394.0949254.3080894.2960294.0867564.3549763.7901044.1876284.1622933.8652644.0873123.9890474.3125394.0389574.122494.2955823.7257584.2208374.107443.583294.0949833.7753943.6027664.2297183.6138163.9341523.6130153.9847573.7876554.2154244.273211…3.5925934.04.51.6253.8974363.6666673.6904764.24.0833333.2083332.6666674.02.753.13.7272733.7222223.03.1428574.03.3753.03.8753.1666673.6363643.7142864.2954554.03.968753.2254.04.03.253.8571433.6666673.01.252.5
83.955174.2178584.3454014.0758184.1359664.1406834.1809174.0554033.9085644.1426924.2890424.3680974.1324873.862594.4175324.0162133.9337244.317023.9932483.799184.0635174.2834593.9277873.9263363.3197323.7892933.9726253.7717654.0837983.6577283.8288343.631463.8753.7923293.7413794.134059…3.53.3333333.8888892.43.82.52.80.54.3333331.82.53.8333333.03.4583333.8571434.13.968753.18754.52.73.253.53.8333334.03.54.3636363.781254.53.53.73.83.8333332.03.8333333.53.3753.0
time: 83.2 ms (started: 2024-04-26 12:15:18 -07:00)
In [ ]:
X_L.write_csv('../data/X_L_movie_loadings_for_10_groups.csv')
time: 40.4 ms (started: 2024-04-26 12:12:16 -07:00)
In [ ]:
M_C_L = X_L.with_columns(
    pl.col('labels').cast(pl.Utf8)
    ).transpose(include_header=True, header_name='movieId', column_names='labels')
M_C_L.write_csv('../data/M_C_L_movie_cluster_loadings_5979x11.csv')
M_C_L
Out[ ]:
shape: (5_979, 11)
movieId1957236408
strf64f64f64f64f64f64f64f64f64f64
"79132"4.04.3742243.9514693.7303544.3198763.8458884.2953023.9587014.3405463.95517
"2571"4.408564.483674.1431253.8396984.3785274.0644034.2728263.8303854.208894.217858
"318"4.372474.4726964.2342034.0193054.4163914.2354144.4124184.0434834.5163124.345401
"58559"4.26074.4124614.070983.7883214.3152493.9856074.2375513.946344.2929834.075818
"2959"4.289374.307374.2167334.0004764.3092924.1178114.2712723.9727874.4208444.135966
……………………………
"67620"3.6034483.3695653.2777783.53.552.53.8695653.6333333.8571432.0
"115967"3.2978723.2631583.6253.53.4642863.406253.6590913.6538463.6666673.833333
"127134"3.2272733.5714292.8809523.2777783.253.253.43.2441863.03.5
"185997"3.1578952.52.4423082.9285712.5434782.9753.0454552.6666671.253.375
"251922"2.5434782.7448982.2647061.02.770271.9583333.02.6086962.53.0
time: 42.7 ms (started: 2024-04-26 12:35:52 -07:00)
In [ ]:
########### skip to 3 Prepare FT Data ###################
In [ ]:
# U.npy (56_318, 10) to do KMeans clustering to get labels

from yellowbrick.cluster import silhouette_visualizer
visualizer = silhouette_visualizer(KMeans(2, random_state=42), U)
No description has been provided for this image
time: 55.6 s (started: 2024-04-26 00:36:01 -07:00)
In [ ]:
# elbow viz
from yellowbrick.cluster import KElbowVisualizer
vis = KElbowVisualizer(KMeans(), k=range(5,100,5))
vis.fit(U)
vis.show()
No description has been provided for this image
Out[ ]:
<Axes: title={'center': 'Distortion Score Elbow for KMeans Clustering'}, xlabel='k', ylabel='distortion score'>
time: 2.64 s (started: 2024-04-26 00:39:37 -07:00)
In [ ]:
# elbow viz
from yellowbrick.cluster import KElbowVisualizer
vis = KElbowVisualizer(KMeans(), k=range(5,100,5), metric='silhouette')
vis.fit(U)
vis.show()
No description has been provided for this image
Out[ ]:
<Axes: title={'center': 'Silhouette Score Elbow for KMeans Clustering'}, xlabel='k', ylabel='silhouette score'>
time: 7min 23s (started: 2024-04-26 00:43:13 -07:00)
In [ ]:
# elbow viz
from yellowbrick.cluster import KElbowVisualizer
vis = KElbowVisualizer(KMeans(), k=range(5,100,5), metric='calinski_harabasz')
vis.fit(U)
vis.show()
No description has been provided for this image
Out[ ]:
<Axes: title={'center': 'Calinski Harabasz Score Elbow for KMeans Clustering'}, xlabel='k', ylabel='calinski harabasz score'>
time: 1.27 s (started: 2024-04-26 00:50:45 -07:00)
In [ ]:
# elbow viz
from yellowbrick.cluster import KElbowVisualizer
vis = KElbowVisualizer(KMeans(), k=range(20,40), metric='silhouette')
vis.fit(U)
vis.show()
No description has been provided for this image
Out[ ]:
<Axes: title={'center': 'Silhouette Score Elbow for KMeans Clustering'}, xlabel='k', ylabel='silhouette score'>
time: 7min 53s (started: 2024-04-26 00:51:02 -07:00)
In [ ]:
# elbow viz
from yellowbrick.cluster import KElbowVisualizer
vis = KElbowVisualizer(KMeans(), k=range(20,40))
vis.fit(U)
vis.show()
No description has been provided for this image
Out[ ]:
<Axes: title={'center': 'Distortion Score Elbow for KMeans Clustering'}, xlabel='k', ylabel='distortion score'>
time: 2.62 s (started: 2024-04-26 01:11:04 -07:00)
In [ ]:
# elbow viz
from yellowbrick.cluster import KElbowVisualizer
vis = KElbowVisualizer(KMeans(), k=range(10,40), metric='silhouette')
vis.fit(U)
vis.show()
No description has been provided for this image
Out[ ]:
<Axes: title={'center': 'Silhouette Score Elbow for KMeans Clustering'}, xlabel='k', ylabel='silhouette score'>
time: 11min 54s (started: 2024-04-26 00:59:03 -07:00)

2.4 UMAP + HDBSCAN¶

2.4.1 MF Embeddings¶

In [ ]:
# user embeddings from NMF
U0
Out[ ]:
array([[-2.13238908e-02,  1.44819752e-02,  6.02356211e-02, ...,
        -2.10993512e-03,  1.06076370e-04, -6.22310116e-04],
       [-1.94242450e-02,  2.64664853e-02,  4.19494100e-02, ...,
        -1.50263473e-03,  4.69035863e-04,  1.42112823e-03],
       [-1.54190095e-02,  3.42269345e-03,  4.71016766e-02, ...,
        -4.83255914e-04,  6.66800463e-04, -4.94009473e-03],
       ...,
       [-1.77246506e-03, -1.52386116e-03, -4.81911236e-03, ...,
         9.96737901e-01,  1.50495452e-04, -1.64694205e-04],
       [-1.21989953e-03,  1.79720073e-03, -2.77891537e-03, ...,
         1.50093534e-04,  9.91962407e-01, -7.39255793e-05],
       [-8.48773956e-04, -2.09768266e-03, -4.46399927e-04, ...,
        -1.63915431e-04,  2.03453663e-04,  9.58858921e-01]])
time: 2.25 ms (started: 2024-04-26 02:18:46 -07:00)
In [ ]:
U0.shape
Out[ ]:
(56318, 56318)
time: 1.12 ms (started: 2024-04-26 03:03:12 -07:00)
In [ ]:
from sklearn.datasets import fetch_openml
from sklearn.decomposition import PCA
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# Dimension reduction and clustering libraries
import umap
import hdbscan
import sklearn.cluster as cluster
from sklearn.metrics import adjusted_rand_score, adjusted_mutual_info_score
2024-04-26 03:03:55.300914: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-04-26 03:03:57.417630: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-04-26 03:04:25.388514: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
time: 1min 29s (started: 2024-04-26 03:03:24 -07:00)
In [ ]:
from sklearn.decomposition import NMF

A = X_fill_zero[:,1:]

# Initialize NMF and fit it to the matrix A
nmf = NMF(n_components=5980, init='random', random_state=0)
U = nmf.fit_transform(A)
M = nmf.components_

# Print the resulting matrices
print(f"{U[:2]=}")
print(f"{M[:2]=}")
U[:2]=array([[2.9465047 , 7.57024189, 0.        , 0.60239043, 0.55344382,
        0.        , 0.02233019, 0.9683429 , 0.        , 4.84344654],
       [4.39981346, 9.07085948, 0.        , 0.53721187, 0.68230197,
        0.        , 0.        , 0.61367887, 0.13100777, 1.24841661]])
M[:2]=array([[0.        , 0.09379923, 0.        , ..., 0.00821147, 0.        ,
        0.02712022],
       [0.13191784, 0.10947779, 0.153014  , ..., 0.01517629, 0.01982737,
        0.00635432]])
time: 20.2 s (started: 2024-04-26 00:14:37 -07:00)
In [ ]:
 

3 Prepare Finetuning Data¶

In [ ]:
# check # of movies have metadata
movies = filtered_movie_ids.join(movie_counts, on='movieId', how='left')
movies
Out[ ]:
shape: (5_981, 6)
movieIdcountimdbIdtmdbIdtitlegenres
i64u32i64i64strstr
7913247695137566627205"Inception (2010)""Action|Crime|Drama|Mystery|Sci-Fi|Thriller|IMAX"
257147209133093603"Matrix, The (1999)""Action|Sci-Fi|Thriller"
31844585111161278"Shawshank Redemption, The (1994)""Crime|Drama"
5855942725468569155"Dark Knight, The (2008)""Action|Crime|Drama|IMAX"
295941295137523550"Fight Club (1999)""Action|Crime|Drama|Thriller"
………………
1159672002268458199575"These Final Hours (2014)""Drama|Thriller"
2519222006654210581726"Infinite (2021)""Action|Sci-Fi|Thriller"
79762002090777090"Ken Park (2002)""Drama"
67620200107324114637"Nothing But the Truth (2008)""Drama|Thriller"
1859972005619332399796"Life of the Party (2018)""Comedy"
time: 4.75 ms (started: 2024-04-26 12:24:01 -07:00)
In [ ]:
plots = pl.read_csv('../../../movieplot/mpst_full_data.csv')
plots = plots.with_columns(
    imdbId=pl.col('imdb_id').str.replace(r'tt0*','').cast(pl.Int64, strict=False)
)
plots
Out[ ]:
shape: (14_828, 7)
imdb_idtitleplot_synopsistagssplitsynopsis_sourceimdbId
strstrstrstrstrstri64
"tt0057603""I tre volti della paura""Note: this synopsis is for the orginal Italian release with the segments in this certain order.Boris Karloff introduces three horror tales of the macabre and the supernatural known as the 'Three Face…"cult, horror, gothic, murder, atmospheric""train""imdb"57603
"tt1733125""Dungeons & Dragons: The Book of Vile Darkness""Two thousand years ago, Nhagruul the Foul, a sorcerer who reveled in corrupting the innocent and the spread of despair, neared the end of his mortal days and was dismayed. Consumed by hatred for the …"violence""train""imdb"1733125
"tt0033045""The Shop Around the Corner""Matuschek's, a gift store in Budapest, is the workplace of Alfred Kralik (James Stewart) and the newly hi Ed Klara Novak (Margaret Sullavan). At work they constantly irritate each other, but this dai…"romantic""test""imdb"33045
"tt0113862""Mr. Holland's Opus""Glenn Holland, not a morning person by anyone's standards, is woken up by his wife Iris early one bright September morning in 1964. Glenn has taken a job as a music teacher at the newly renamed John …"inspiring, romantic, stupid, feel-good""train""imdb"113862
"tt0086250""Scarface""In May 1980, a Cuban man named Tony Montana (Al Pacino) claims asylum, in Florida, USA, and is in search of the "American Dream" after departing Cuba in the Mariel boatlift of 1980. When questioned b…"cruelty, murder, dramatic, cult, violence, atmospheric, action, romantic, revenge, sadist""val""imdb"86250
…………………
"tt0219952""Lucky Numbers""In 1988 Russ Richards (John Travolta), the weatherman for a Harrisburg, Pennsylvania television station, is revered as a local celebrity by his viewers, and fame affords him such perks as a reserved …"comedy, murder""test""wikipedia"219952
"tt1371159""Iron Man 2""In Russia, the media covers Tony Stark's disclosure of his identity as Iron Man. Ivan Vanko, whose father Anton Vanko has just died, sees this and begins building a miniature arc reactor similar to S…"good versus evil, violence""train""wikipedia"1371159
"tt0063443""Play Dirty""During the North African Campaign in World War II, Captain Douglas (Caine) is a British Petroleum employee seconded to the Royal Engineers to oversee incoming fuel supplies for the British 8th Army. …"anti war""train""wikipedia"63443
"tt0039464""High Wall""Steven Kenet catches his unfaithful wife in the apartment of Willard I. Whitcombe, her boss, and apparently strangles her. Believing he killed her, he attempts to commit suicide by driving his car in…"murder""test""wikipedia"39464
"tt0235166""Against All Hope""Sometime in the 1950s in Chicago a man, Cecil Moe (Michael Madsen) returns home from work with his friend, Joe Cleveland. When Cecil arrives home, he finds his wife Jean and her friend Shannon pourin…"christian film""test""wikipedia"235166
time: 781 ms (started: 2024-04-26 12:24:10 -07:00)
In [ ]:
# 1756 null plots, scrape later on IMDb
movies = movies.join(plots, on='imdbId', how='left')
movies['plot_synopsis'].is_null().sum()
Out[ ]:
1756
time: 3.48 ms (started: 2024-04-26 12:24:33 -07:00)
In [ ]:
FT_D = movies.drop_nulls().with_columns(
    pl.col('movieId').cast(pl.Utf8)
    ).join(
        M_C_L,
        on='movieId'
)
FT_D.select(
    ['movieId', 'title', 'genres', 'imdb_id', 'plot_synopsis', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
)
Out[ ]:
shape: (4_225, 15)
movieIdtitlegenresimdb_idplot_synopsis0123456789
strstrstrstrstrf64f64f64f64f64f64f64f64f64f64
"79132""Inception (2010)""Action|Crime|Drama|Mystery|Sci-Fi|Thriller|IMAX""tt1375666""A young man, exhausted and delirious, washes up on a beach, looking up momentarily to see two young children (Claire Geare and Magnus Nolan) playing in the sand before he passes out. An armed guard (…4.3405464.04.3198763.8458883.9587013.9514694.2953023.7303543.955174.374224
"2571""Matrix, The (1999)""Action|Sci-Fi|Thriller""tt0133093""The screen is filled with green, cascading code which gives way to the title, The Matrix.A phone rings and text appears on the screen: "Call trans opt: received. 2-19-98 13:24:18 REC: Log>" As a conv…4.208894.408564.3785274.0644033.8303854.1431254.2728263.8396984.2178584.48367
"318""Shawshank Redemption, The (1994)""Crime|Drama""tt0111161""In 1947, Andy Dufresne (Tim Robbins), a banker in Maine, is convicted of murdering his wife and her lover, a golf pro. Since the state of Maine has no death penalty, he is given two consecutive life …4.5163124.372474.4163914.2354144.0434834.2342034.4124184.0193054.3454014.472696
"58559""Dark Knight, The (2008)""Action|Crime|Drama|IMAX""tt0468569""The movie begins with a gang of men with clown masks breaking into the bank where the mob has a large portion of their money stashed. It begins with five clowns, each getting a cut of the spoils. The…4.2929834.26074.3152493.9856073.946344.070984.2375513.7883214.0758184.412461
"2959""Fight Club (1999)""Action|Crime|Drama|Thriller""tt0137523""We back out of the webbing of neurons and brain cells as the title credits appear, finding ourselves emerging from the sweat-glistened skin of the protagonist: our narrator (Edward Norton), as he loo…4.4208444.289374.3092924.1178113.9727874.2167334.2712724.0004764.1359664.30737
………………………………………
"93819""Absentia (2011)""Horror""tt1610996""As the film begins we see Tricia removing old posters and replacing them with new ones. When she comes home, she finds that her younger sister Callie has arrived. She comes to live with her as the pr…3.8753.2727273.1111113.2857143.4753.1470593.8181823.1666673.52.909091
"81""Things to Do in Denver When You're Dead (1995)""Crime|Drama|Romance""tt0114660""Jimmy "The Saint" Tosnia is an ex-gangster living in Denver. Jimmy left the criminal world, to "go straight" with his "Afterlife Advice" business, where dying people videotape messages for their love…4.02.9456523.02.92.93.0263163.3571432.5833333.73.263158
"3952""Contender, The (2000)""Drama|Thriller""tt0208874""Second-term Democratic U.S. President Jackson Evans must select a new Vice President following the sudden death of his previous vice president. The obvious choice seems to be Virginia Governor Jack H…4.03.5188683.3252.7647063.53.1071433.4347833.7647063.83.5
"7976""Ken Park (2002)""Drama""tt0209077""The opening of the film depicts teenager Ken Park (nicknamed "Krap Nek", which is his first and last name spelled and pronounced backward) skateboarding across Visalia, California. He arrives at a sk…3.252.9130433.523.2857143.5540543.0384623.152.7083333.8333333.0
"67620""Nothing But the Truth (2008)""Drama|Thriller""tt1073241""Robert Bennett (Richard Dix) is a stockbroker who is very carefree with other people's money. Encouraging clients to buy stocks in companies that are failing is all in a day's work to him. His fiancé…3.8571433.6034483.552.53.6333333.2777783.8695653.52.03.369565
time: 6.34 ms (started: 2024-04-26 12:44:50 -07:00)
In [ ]:
FT_D.write_ndjson('../data/FT_D_raw_4225x15.jsonl')
time: 65.7 ms (started: 2024-04-26 12:59:13 -07:00)
In [ ]:
# now how to construct the jsonl file format to best represent what we want to do?
# DSPy package:
# https://github.com/stanfordnlp/dspy/blob/main/dspy/teleprompt/finetune.py
In [ ]:
 
In [ ]:
 

4 Finetune a BERT-based model¶

ftd_X = list(FT_D['plot_synopsis']) ftd_y = list(FT_D['0'])

In [ ]:
 
In [ ]:
 
In [ ]:
 

5 Finetune a GPT-based model¶

In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 

6 Finetune a Mixtral of Experts model¶

In [ ]:
# domain expert on movie industry, human interfaces
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]: